%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gymnasium as gym
from utils.riskenv import (RiskManagementEnv,
RiskManagementEnvMonthlyEpisodes,
RiskManagementEnvDynaState)
from utils.common import (create_logger, predict_from_env,predict_from_frame_v2,
predict_from_frame, running_mean_last_n_samples,
plot_actions_running_mean, plot_reward_history,
plot_default_rate_history, get_csv_files, exponential_decay_schedule)
from utils.networks import ConvNetwork, FCNetwork, LSTMNetwork, GRUNetwork, GRUNetworkBidirectional
from utils.constants import date_code_mapping
from sklearn.kernel_approximation import RBFSampler
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import SubprocVecEnv
from stable_baselines3 import A2C, PPO, DQN
from sb3_contrib import TRPO, RecurrentPPO, QRDQN
from sklearn.preprocessing import MinMaxScaler, Normalizer
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from sklearn.metrics import (f1_score, recall_score, mean_absolute_error,
accuracy_score, mean_squared_error, r2_score)
import warnings
warnings.filterwarnings("ignore")
sns.set_theme(style='whitegrid', context='paper', font_scale=1., palette='muted')
# fixed random seed
SEED=123
rng = np.random.default_rng(SEED)
fpath = "../data/dataset_1/processed/train_subset.csv"
train_subset = pd.read_csv(fpath)
original_features = ['mispay_days',
'mispay_d_90d_12m',
'total_balance',
'income',
'age',
'mortgage',
'mortgage_qty',
'maturity_months',
'monthly_payment',
'subsidy',
'interest_rate']
default_col = 'default_120_12m'
dates_from_codes = {v: k for k, v in date_code_mapping.items()}
# this is the convention used for input to the environment date is 1st index and client_id is 2nd index.
train_subset = train_subset.sort_values(by=['date_code', 'client_id']).set_index(["date_code", "client_id"])
# RBF feature transformation
FEATURE_EXPANSION = False
if FEATURE_EXPANSION:
n_components = 200
rbf_feature = RBFSampler(gamma=1, n_components=n_components,random_state=SEED)
features = [f"x_{x}" for x in range(n_components)]
train_transformed = pd.DataFrame(rbf_feature.fit_transform(train_subset[original_features]), columns=features)
train_subset = train_subset[["default_120_12m"]].reset_index().merge(train_transformed, left_index=True, right_index=True)
train_subset = train_subset.sort_values(by=['date_code', 'client_id']).set_index(["date_code", "client_id"])
SCALE = True
if SCALE:
features = original_features if not FEATURE_EXPANSION else features
scaler = MinMaxScaler((-1, 1))
train_subset[features] = scaler.fit_transform(train_subset[features])
train_subset[features] = train_subset[features].clip(-1, 1)
LEARNING_STEPS = len(train_subset)
# Define environment configuration
CustomEnv = RiskManagementEnvMonthlyEpisodes
kwargs = {
'features_col': features, # feature colums
'default_col': default_col, # default column
'obs_dim': len(features),
'action_dim' : 3, # 0,1
'client_dim': len(train_subset.loc[0].index.unique()), # number of clients
'model_name': "test"
}
check_env( CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng=rng, accepts_discrete_action=True, **kwargs), warn=True)
from utils.duelingdqn import DuelingDQNPolicy
kwargs = {
'features_col': features, # feature colums
'default_col': 'default_120_12m', # default column
'obs_dim': len(features),
'action_dim' : 2, # 0,1
'client_dim': len(train_subset.loc[0].index.unique()), # number of clients
'model_name': "test"
}
num_envs = 1
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng = rng, accepts_discrete_action=True, **kwargs) for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
log_path = "logs/binary/dueldqn"
logger = create_logger(log_path)
feature_extractor = GRUNetwork
# Now use the custom network in policy_kwargs
policy_kwargs = dict(
features_extractor_class=feature_extractor,
features_extractor_kwargs=dict(first_layer=400,
first_layer_dropout=0.1,
output_layer=400,
output_layer_dropout=0.1)
)
model_dueldqn = DQN(DuelingDQNPolicy,
training_env,
verbose=1,
device="cpu",
learning_rate=1e-3,
gamma=0.9,
policy_kwargs=policy_kwargs,
buffer_size=1_000_000,
learning_starts=100,
batch_size=64,
tau=1.0,
train_freq=2
)
model_dueldqn.set_logger(logger)
model_dueldqn.learn(LEARNING_STEPS*3,log_interval=1)
model_dueldqn.save("output/binary/dueldqn")
Using cpu device default: 0.333 - penalty: -1.70 reward: -1.58 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.83 reward: 3.61 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.84 reward: 3.61 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.86 reward: 3.55 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.68 reward: 2.74 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.81 reward: 2.64 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.85 reward: 2.65 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.67 reward: 2.74 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.67 reward: 2.74 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.63 reward: 2.83 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.73 reward: 2.74 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.72 reward: 3.65 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.80 reward: 2.65 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.82 reward: 3.64 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.88 reward: 2.59 - client: 173.0 month: 15 - done: True default: 0.333 - penalty: -1.94 reward: 2.63 - client: 214.0 month: 1 - done: True default: 0.342 - penalty: -1.75 reward: 2.63 - client: 49.0 month: 2 - done: True default: 0.342 - penalty: -1.81 reward: 2.62 - client: 519.0 month: 3 - done: True default: 0.357 - penalty: -1.92 reward: 3.54 - client: 158.0 month: 4 - done: True default: 0.318 - penalty: -1.77 reward: 3.72 - client: 250.0 month: 5 - done: True default: 0.337 - penalty: -1.78 reward: 2.64 - client: 399.0 month: 6 - done: True default: 0.332 - penalty: -1.77 reward: 2.66 - client: 273.0 month: 7 - done: True default: 0.318 - penalty: -1.72 reward: 3.73 - client: 271.0 month: 8 - done: True default: 0.318 - penalty: -1.63 reward: 2.75 - client: 441.0 month: 9 - done: True default: 0.302 - penalty: -1.62 reward: 2.83 - client: 384.0 month: 10 - done: True default: 0.317 - penalty: -1.69 reward: 3.74 - client: 456.0 month: 11 - done: True default: 0.337 - penalty: -1.82 reward: 2.63 - client: 22.0 month: 12 - done: True default: 0.333 - penalty: -1.84 reward: 3.65 - client: 244.0 month: 13 - done: True default: 0.337 - penalty: -1.88 reward: 2.63 - client: 221.0 month: 14 - done: True default: 0.345 - penalty: -1.86 reward: 2.60 - client: 413.0 month: 15 - done: True default: 0.333 - penalty: -1.80 reward: 2.65 - client: 90.0 month: 1 - done: True default: 0.342 - penalty: -1.82 reward: 2.61 - client: 379.0 month: 2 - done: True default: 0.342 - penalty: -1.83 reward: 2.61 - client: 206.0 month: 3 - done: True default: 0.357 - penalty: -1.91 reward: 2.54 - client: 218.0 month: 4 - done: True default: 0.318 - penalty: -1.66 reward: 2.74 - client: 426.0 month: 5 - done: True default: 0.337 - penalty: -1.80 reward: 2.64 - client: 238.0 month: 6 - done: True default: 0.332 - penalty: -1.77 reward: 2.66 - client: 466.0 month: 7 - done: True default: 0.318 - penalty: -1.73 reward: 2.73 - client: 176.0 month: 8 - done: True default: 0.318 - penalty: -1.67 reward: 2.74 - client: 379.0 month: 9 - done: True default: 0.302 - penalty: -1.59 reward: 3.83 - client: 392.0 month: 10 - done: True default: 0.317 - penalty: -1.65 reward: 2.75 - client: 533.0 month: 11 - done: True default: 0.337 - penalty: -1.83 reward: 2.63 - client: 577.0 month: 12 - done: True default: 0.333 - penalty: -1.77 reward: 3.66 - client: 52.0 month: 13 - done: True default: 0.337 - penalty: -1.82 reward: 2.63 - client: 545.0 month: 14 - done: True default: 0.345 - penalty: -1.87 reward: 2.59 - client: 151.0 month: 15 - done: True default: 0.333 - penalty: -1.81 reward: 2.65 - client: 559.0 month: 1 - done: True default: 0.342 - penalty: -1.89 reward: 3.60 - client: 29.0 month: 2 - done: True default: 0.342 - penalty: -1.87 reward: 2.61 - client: 217.0 month: 3 - done: True
# Define environment configuration
n_steps = len(train_subset)
rew_dueldqn, _ = predict_from_env(model_dueldqn, training_env, n_steps)
act_dueldqn, date_codes = predict_from_frame(model_dueldqn, train_subset[features])
action: [0] - reward : [2.54939101] - done: [False] action: [1] - reward : [3.5383097] - done: [False] action: [0] - reward : [2.5302361] - done: [False] action: [1] - reward : [3.54468744] - done: [False] action: [0] - reward : [2.5356083] - done: [False] action: [1] - reward : [3.5383097] - done: [False] default: 0.357 - penalty: -1.93 reward: 2.54 - client: 15.0 month: 4 - done: True action: [0] - reward : [2.69773879] - done: [False] action: [0] - reward : [2.72631315] - done: [False] action: [0] - reward : [2.73180769] - done: [False] action: [1] - reward : [3.73727375] - done: [False] action: [0] - reward : [2.73415921] - done: [False] action: [0] - reward : [2.73727375] - done: [False] default: 0.318 - penalty: -1.74 reward: 2.73 - client: 513.0 month: 5 - done: True action: [0] - reward : [2.64846993] - done: [False] action: [0] - reward : [2.64441836] - done: [False] action: [0] - reward : [2.62752885] - done: [False] action: [0] - reward : [2.63588498] - done: [False] action: [0] - reward : [2.6313783] - done: [False] action: [1] - reward : [3.62752885] - done: [False] default: 0.337 - penalty: -1.79 reward: 2.64 - client: 93.0 month: 6 - done: True action: [0] - reward : [2.6600092] - done: [False] action: [1] - reward : [3.6671976] - done: [False] action: [0] - reward : [2.67017804] - done: [False] action: [0] - reward : [2.66876267] - done: [False] action: [0] - reward : [2.65247613] - done: [False] action: [0] - reward : [2.65864717] - done: [False] default: 0.332 - penalty: -1.81 reward: 2.66 - client: 425.0 month: 7 - done: True action: [0] - reward : [2.71728266] - done: [False] action: [0] - reward : [2.73031498] - done: [False] action: [1] - reward : [3.73500046] - done: [False] action: [1] - reward : [3.73415921] - done: [False] action: [1] - reward : [3.73337212] - done: [False] action: [0] - reward : [2.7365016] - done: [False] default: 0.318 - penalty: -1.75 reward: 2.73 - client: 206.0 month: 8 - done: True action: [0] - reward : [2.72710465] - done: [False] action: [1] - reward : [3.73180769] - done: [False] action: [0] - reward : [2.72867948] - done: [False] action: [0] - reward : [2.7215997] - done: [False] action: [1] - reward : [3.73651073] - done: [False] action: [1] - reward : [3.7357439] - done: [False] default: 0.318 - penalty: -1.69 reward: 2.74 - client: 66.0 month: 9 - done: True action: [0] - reward : [2.81750312] - done: [False] action: [0] - reward : [2.82194186] - done: [False] action: [1] - reward : [3.83303007] - done: [False] action: [1] - reward : [3.83980156] - done: [False] action: [1] - reward : [3.81790134] - done: [False] action: [0] - reward : [2.83518677] - done: [False] default: 0.302 - penalty: -1.63 reward: 3.83 - client: 269.0 month: 10 - done: True action: [1] - reward : [3.75332475] - done: [False] action: [0] - reward : [2.74432743] - done: [False] action: [0] - reward : [2.7349031] - done: [False] action: [1] - reward : [3.7325627] - done: [False] action: [0] - reward : [2.74432743] - done: [False] action: [0] - reward : [2.74512193] - done: [False] default: 0.317 - penalty: -1.72 reward: 2.74 - client: 151.0 month: 11 - done: True action: [0] - reward : [2.65168869] - done: [False] action: [0] - reward : [2.62274088] - done: [False] action: [1] - reward : [3.64219127] - done: [False] action: [1] - reward : [3.63296782] - done: [False] action: [0] - reward : [2.63500012] - done: [False] action: [0] - reward : [2.63296782] - done: [False] default: 0.337 - penalty: -1.78 reward: 3.64 - client: 592.0 month: 12 - done: True action: [1] - reward : [3.65451848] - done: [False] action: [0] - reward : [2.65848604] - done: [False] action: [0] - reward : [2.646955] - done: [False] action: [1] - reward : [3.66230339] - done: [False] action: [0] - reward : [2.65465564] - done: [False] action: [1] - reward : [3.65713195] - done: [False] default: 0.333 - penalty: -1.85 reward: 2.64 - client: 368.0 month: 13 - done: True action: [1] - reward : [3.63075598] - done: [False] action: [1] - reward : [3.6297889] - done: [False] action: [1] - reward : [3.62502145] - done: [False] action: [0] - reward : [2.64061435] - done: [False] action: [0] - reward : [2.64664544] - done: [False] action: [0] - reward : [2.63204896] - done: [False] default: 0.337 - penalty: -1.76 reward: 2.64 - client: 275.0 month: 14 - done: True action: [0] - reward : [2.61026933] - done: [False] action: [0] - reward : [2.58169313] - done: [False] action: [0] - reward : [2.59092994] - done: [False] action: [0] - reward : [2.59946383] - done: [False] action: [0] - reward : [2.59506735] - done: [False] action: [0] - reward : [2.59885007] - done: [False] default: 0.345 - penalty: -1.85 reward: 3.60 - client: 223.0 month: 15 - done: True action: [1] - reward : [3.64413677] - done: [False] action: [0] - reward : [2.65937593] - done: [False] action: [0] - reward : [2.65397162] - done: [False] action: [0] - reward : [2.6492325] - done: [False] action: [0] - reward : [2.65171441] - done: [False] action: [0] - reward : [2.6513902] - done: [False] default: 0.333 - penalty: -1.82 reward: 2.65 - client: 321.0 month: 1 - done: True action: [1] - reward : [3.61429503] - done: [False] action: [1] - reward : [3.6083826] - done: [False] action: [1] - reward : [3.61061035] - done: [False] action: [1] - reward : [3.6083826] - done: [False] action: [0] - reward : [2.60137372] - done: [False] action: [0] - reward : [2.61061035] - done: [False] default: 0.342 - penalty: -1.81 reward: 3.62 - client: 147.0 month: 2 - done: True action: [0] - reward : [2.61598734] - done: [False] action: [0] - reward : [2.59912614] - done: [False] action: [0] - reward : [2.61693559] - done: [False] action: [1] - reward : [3.61473346] - done: [False] action: [0] - reward : [2.61093704] - done: [False] action: [0] - reward : [2.60296731] - done: [False] default: 0.342 - penalty: -1.85 reward: 3.61 - client: 423.0 month: 3 - done: True action: [0] - reward : [2.55258818] - done: [False] action: [1] - reward : [3.5383097] - done: [False] action: [0] - reward : [2.54162019] - done: [False] action: [0] - reward : [2.54840649] - done: [False] action: [0] - reward : [2.54363993] - done: [False] action: [0] - reward : [2.53990382] - done: [False] default: 0.357 - penalty: -1.83 reward: 2.56 - client: 366.0 month: 4 - done: True action: [1] - reward : [3.7143976] - done: [False]
plot_default_rate_history(date_codes = date_codes,
actual = train_subset[default_col],
actions = act_dueldqn,
dates_from_codes = dates_from_codes,
save_path="../imgs/2_modeling/dfr_history_dueldqn.png",
title = "Dueling DQN - Actual vs. Predicted")
plot_reward_history(reward = rew_dueldqn,
window_size=100,
save_path="../imgs/2_modeling/rew_history_dueldqn.png",
title = "Dueling DQN - Reward")
plot_actions_running_mean(actions=act_dueldqn,
window_size=100,
save_path="../imgs/2_modeling/act_run_mean_dueldqn.png",
title="Dueling DQN - Actions Running Mean (i.e. default rate)")
# [True Negative ,False Negative]
# [False Positive, True Positive]
print("-"*30)
print(pd.DataFrame(confusion_matrix(train_subset[default_col].values, act_dueldqn), columns=["True 0", "True 1"], index=["Pred 0", "Pred 1"]))
print("-"*30)
print(classification_report(train_subset[default_col].values, act_dueldqn))
------------------------------
True 0 True 1
Pred 0 6394 57
Pred 1 5 3192
------------------------------
precision recall f1-score support
0.0 1.00 0.99 1.00 6451
1.0 0.98 1.00 0.99 3197
accuracy 0.99 9648
macro avg 0.99 0.99 0.99 9648
weighted avg 0.99 0.99 0.99 9648
from utils.doubledqn import DoubleDQN
kwargs = {
'features_col': features, # feature colums
'default_col': 'default_120_12m', # default column
'obs_dim': len(features),
'action_dim' : 2, # 0,1
'client_dim': len(train_subset.loc[0].index.unique()), # number of clients
'model_name': "test"
}
num_envs = 1
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, rng = rng, seed=SEED, accepts_discrete_action=True, **kwargs)for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
log_path = "logs/binary/ddqn"
logger = create_logger(log_path)
feature_extractor = GRUNetwork
# Now use the custom network in policy_kwargs
policy_kwargs = dict(
features_extractor_class=feature_extractor,
features_extractor_kwargs=dict(first_layer=400,
first_layer_dropout=0.1,
output_layer=400,
output_layer_dropout=0.1)
)
model_ddqn = DoubleDQN("MlpPolicy",
training_env,
verbose=1,
device="cpu",
learning_rate=1e-3,
gamma=0.9,
policy_kwargs=policy_kwargs,
buffer_size=1_000_000,
learning_starts=100,
batch_size=64,
tau=1.0,
train_freq=2
)
model_ddqn.set_logger(logger)
model_ddqn.learn(LEARNING_STEPS*3,log_interval=1)
model_ddqn.save("output/binary/ddqn")
Using cpu device default: 0.333 - penalty: -1.56 reward: 2.69 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.78 reward: 3.62 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.77 reward: 3.62 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.85 reward: 3.55 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.70 reward: 2.73 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.80 reward: 2.64 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.82 reward: 2.66 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.65 reward: 2.74 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.65 reward: 2.74 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.57 reward: 2.84 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.76 reward: 2.73 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.68 reward: 3.66 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.77 reward: 2.66 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.84 reward: 3.63 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.89 reward: 2.59 - client: 173.0 month: 15 - done: True default: 0.333 - penalty: -1.92 reward: 2.63 - client: 214.0 month: 1 - done: True default: 0.342 - penalty: -1.75 reward: 2.63 - client: 49.0 month: 2 - done: True default: 0.342 - penalty: -1.82 reward: 2.61 - client: 519.0 month: 3 - done: True default: 0.357 - penalty: -1.94 reward: 3.54 - client: 158.0 month: 4 - done: True default: 0.318 - penalty: -1.77 reward: 3.72 - client: 250.0 month: 5 - done: True default: 0.337 - penalty: -1.75 reward: 2.65 - client: 399.0 month: 6 - done: True default: 0.332 - penalty: -1.73 reward: 2.67 - client: 273.0 month: 7 - done: True default: 0.318 - penalty: -1.72 reward: 3.73 - client: 271.0 month: 8 - done: True default: 0.318 - penalty: -1.66 reward: 2.74 - client: 441.0 month: 9 - done: True default: 0.302 - penalty: -1.62 reward: 2.83 - client: 384.0 month: 10 - done: True default: 0.317 - penalty: -1.71 reward: -2.57 - client: 456.0 month: 11 - done: True default: 0.337 - penalty: -1.81 reward: 2.64 - client: 22.0 month: 12 - done: True default: 0.333 - penalty: -1.87 reward: 3.64 - client: 244.0 month: 13 - done: True default: 0.337 - penalty: -1.87 reward: 2.63 - client: 221.0 month: 14 - done: True default: 0.345 - penalty: -1.86 reward: 2.60 - client: 413.0 month: 15 - done: True default: 0.333 - penalty: -1.81 reward: 2.65 - client: 90.0 month: 1 - done: True default: 0.342 - penalty: -1.83 reward: 2.61 - client: 379.0 month: 2 - done: True default: 0.342 - penalty: -1.81 reward: 2.62 - client: 206.0 month: 3 - done: True default: 0.357 - penalty: -1.91 reward: 2.54 - client: 218.0 month: 4 - done: True default: 0.318 - penalty: -1.64 reward: 2.74 - client: 426.0 month: 5 - done: True default: 0.337 - penalty: -1.84 reward: 2.63 - client: 238.0 month: 6 - done: True default: 0.332 - penalty: -1.77 reward: 2.66 - client: 466.0 month: 7 - done: True default: 0.318 - penalty: -1.71 reward: 2.73 - client: 176.0 month: 8 - done: True default: 0.318 - penalty: -1.67 reward: 2.74 - client: 379.0 month: 9 - done: True default: 0.302 - penalty: -1.62 reward: 3.83 - client: 392.0 month: 10 - done: True default: 0.317 - penalty: -1.65 reward: 2.75 - client: 533.0 month: 11 - done: True default: 0.337 - penalty: -1.81 reward: 2.64 - client: 577.0 month: 12 - done: True default: 0.333 - penalty: -1.77 reward: 3.66 - client: 52.0 month: 13 - done: True default: 0.337 - penalty: -1.84 reward: 2.63 - client: 545.0 month: 14 - done: True default: 0.345 - penalty: -1.87 reward: 2.59 - client: 151.0 month: 15 - done: True default: 0.333 - penalty: -1.82 reward: 2.65 - client: 559.0 month: 1 - done: True default: 0.342 - penalty: -1.92 reward: 3.60 - client: 29.0 month: 2 - done: True default: 0.342 - penalty: -1.89 reward: 2.60 - client: 217.0 month: 3 - done: True
n_steps = len(train_subset)
rew_ddqn, _ = predict_from_env(model_ddqn, training_env, n_steps)
act_ddqn, date_codes = predict_from_frame(model_ddqn, train_subset[features])
action: [0] - reward : [2.54553596] - done: [False] action: [1] - reward : [3.5383097] - done: [False] action: [0] - reward : [2.5340021] - done: [False] action: [1] - reward : [3.54840649] - done: [False] action: [0] - reward : [2.5356083] - done: [False] action: [1] - reward : [3.5383097] - done: [False] default: 0.357 - penalty: -1.93 reward: 2.54 - client: 15.0 month: 4 - done: True action: [0] - reward : [2.69773879] - done: [False] action: [0] - reward : [2.73024348] - done: [False] action: [0] - reward : [2.73180769] - done: [False] action: [1] - reward : [3.74271212] - done: [False] action: [0] - reward : [2.73415921] - done: [False] action: [0] - reward : [2.73727375] - done: [False] default: 0.318 - penalty: -1.74 reward: 2.73 - client: 513.0 month: 5 - done: True action: [0] - reward : [2.64846993] - done: [False] action: [0] - reward : [2.64061435] - done: [False] action: [0] - reward : [2.62752885] - done: [False] action: [0] - reward : [2.64127775] - done: [False] action: [0] - reward : [2.6313783] - done: [False] action: [1] - reward : [3.62912496] - done: [False] default: 0.337 - penalty: -1.77 reward: 2.64 - client: 93.0 month: 6 - done: True action: [0] - reward : [2.6638093] - done: [False] action: [1] - reward : [3.6671976] - done: [False] action: [0] - reward : [2.67172929] - done: [False] action: [0] - reward : [2.66876267] - done: [False] action: [0] - reward : [2.65247613] - done: [False] action: [0] - reward : [2.65864717] - done: [False] default: 0.332 - penalty: -1.80 reward: 2.66 - client: 425.0 month: 7 - done: True action: [0] - reward : [2.71884849] - done: [False] action: [0] - reward : [2.73031498] - done: [False] action: [1] - reward : [3.73500046] - done: [False] action: [1] - reward : [3.73571634] - done: [False] action: [1] - reward : [3.73337212] - done: [False] action: [0] - reward : [2.7365016] - done: [False] default: 0.318 - penalty: -1.75 reward: 2.73 - client: 206.0 month: 8 - done: True action: [0] - reward : [2.72710465] - done: [False] action: [1] - reward : [3.73180769] - done: [False] action: [0] - reward : [2.72867948] - done: [False] action: [0] - reward : [2.7231707] - done: [False] action: [1] - reward : [3.73651073] - done: [False] action: [1] - reward : [3.7357439] - done: [False] default: 0.318 - penalty: -1.69 reward: 2.74 - client: 66.0 month: 9 - done: True action: [0] - reward : [2.81750312] - done: [False] action: [0] - reward : [2.82194186] - done: [False] action: [1] - reward : [3.83303007] - done: [False] action: [1] - reward : [3.83584716] - done: [False] action: [1] - reward : [3.81790134] - done: [False] action: [0] - reward : [2.83518677] - done: [False] default: 0.302 - penalty: -1.62 reward: 3.83 - client: 269.0 month: 10 - done: True action: [1] - reward : [3.75489337] - done: [False] action: [0] - reward : [2.74432743] - done: [False] action: [0] - reward : [2.7349031] - done: [False] action: [1] - reward : [3.7325627] - done: [False] action: [0] - reward : [2.7427646] - done: [False] action: [0] - reward : [2.74512193] - done: [False] default: 0.317 - penalty: -1.72 reward: 2.74 - client: 151.0 month: 11 - done: True action: [0] - reward : [2.65168869] - done: [False] action: [0] - reward : [2.62433681] - done: [False] action: [1] - reward : [3.64219127] - done: [False] action: [1] - reward : [3.63296782] - done: [False] action: [0] - reward : [2.63500012] - done: [False] action: [0] - reward : [2.63296782] - done: [False] default: 0.337 - penalty: -1.79 reward: 3.64 - client: 592.0 month: 12 - done: True action: [1] - reward : [3.65294456] - done: [False] action: [0] - reward : [2.65848604] - done: [False] action: [0] - reward : [2.65081202] - done: [False] action: [1] - reward : [3.66230339] - done: [False] action: [0] - reward : [2.65465564] - done: [False] action: [1] - reward : [3.65713195] - done: [False] default: 0.333 - penalty: -1.85 reward: 2.64 - client: 368.0 month: 13 - done: True action: [1] - reward : [3.63075598] - done: [False] action: [1] - reward : [3.6313783] - done: [False] action: [1] - reward : [3.62502145] - done: [False] action: [0] - reward : [2.64061435] - done: [False] action: [0] - reward : [2.64664544] - done: [False] action: [0] - reward : [2.63204896] - done: [False] default: 0.337 - penalty: -1.74 reward: 2.65 - client: 275.0 month: 14 - done: True action: [0] - reward : [2.61343761] - done: [False] action: [0] - reward : [2.58169313] - done: [False] action: [0] - reward : [2.59092994] - done: [False] action: [0] - reward : [2.59946383] - done: [False] action: [0] - reward : [2.59506735] - done: [False] action: [0] - reward : [2.59885007] - done: [False] default: 0.345 - penalty: -1.85 reward: 3.60 - client: 223.0 month: 15 - done: True action: [1] - reward : [3.64413677] - done: [False] action: [0] - reward : [2.65937593] - done: [False] action: [0] - reward : [2.65397162] - done: [False] action: [0] - reward : [2.6492325] - done: [False] action: [0] - reward : [2.65171441] - done: [False] action: [0] - reward : [2.6513902] - done: [False] default: 0.333 - penalty: -1.82 reward: 2.65 - client: 321.0 month: 1 - done: True action: [1] - reward : [3.61429503] - done: [False] action: [1] - reward : [3.6083826] - done: [False] action: [1] - reward : [3.61061035] - done: [False] action: [1] - reward : [3.6083826] - done: [False] action: [0] - reward : [2.60137372] - done: [False] action: [0] - reward : [2.61061035] - done: [False] default: 0.342 - penalty: -1.81 reward: 3.62 - client: 147.0 month: 2 - done: True action: [0] - reward : [2.61598734] - done: [False] action: [0] - reward : [2.59912614] - done: [False] action: [0] - reward : [2.61693559] - done: [False] action: [1] - reward : [3.61473346] - done: [False] action: [0] - reward : [2.61093704] - done: [False] action: [0] - reward : [2.60296731] - done: [False] default: 0.342 - penalty: -1.85 reward: 3.61 - client: 423.0 month: 3 - done: True action: [0] - reward : [2.55258818] - done: [False] action: [1] - reward : [3.5383097] - done: [False] action: [0] - reward : [2.54319546] - done: [False] action: [0] - reward : [2.54840649] - done: [False] action: [0] - reward : [2.54363993] - done: [False] action: [0] - reward : [2.53990382] - done: [False] default: 0.357 - penalty: -1.83 reward: 2.56 - client: 366.0 month: 4 - done: True action: [1] - reward : [3.7143976] - done: [False]
plot_default_rate_history(date_codes = date_codes,
actual = train_subset[default_col],
actions = act_ddqn,
dates_from_codes = dates_from_codes,
save_path="../imgs/2_modeling/dfr_history_ddqn.png",
title = "Double DQN - Actual vs. Predicted",)
plot_reward_history(reward = rew_ddqn,
window_size=10,
save_path="../imgs/2_modeling/rew_history_ddqn.png",
title = "Dueling DQN - Reward")
plot_actions_running_mean(actions=act_ddqn,
window_size=64,
save_path="../imgs/2_modeling/act_run_mean_ddqn.png",
title="Dueling DQN - Actions Running Mean (i.e. default rate)")
# [True Negative ,False Negative]
# [False Positive, True Positive]
print("-"*30)
print(pd.DataFrame(confusion_matrix(train_subset[default_col].values, act_ddqn), columns=["True 0", "True 1"], index=["Pred 0", "Pred 1"]))
print("-"*30)
print(classification_report(train_subset[default_col].values, act_ddqn))
------------------------------
True 0 True 1
Pred 0 6364 87
Pred 1 2 3195
------------------------------
precision recall f1-score support
0.0 1.00 0.99 0.99 6451
1.0 0.97 1.00 0.99 3197
accuracy 0.99 9648
macro avg 0.99 0.99 0.99 9648
weighted avg 0.99 0.99 0.99 9648
kwargs = {
'features_col': features, # feature colums
'default_col': 'default_120_12m', # default column
'obs_dim': len(features),
'action_dim' : 2, # 0,1
'client_dim': len(train_subset.loc[0].index.unique()), # number of clients
'model_name': "test"
}
num_envs = 1
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng=rng, accepts_discrete_action=True, **kwargs)for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
log_path = "logs/binary/dqn"
logger = create_logger(log_path)
feature_extractor = GRUNetwork
# Now use the custom network in policy_kwargs
policy_kwargs = dict(
features_extractor_class=feature_extractor,
features_extractor_kwargs=dict(first_layer=400,
first_layer_dropout=0.3,
output_layer=400,
output_layer_dropout=0.1)
)
model_dqn = DQN("MlpPolicy",
training_env,
verbose=1,
device="cpu",
seed=SEED,
learning_rate=1e-3,
gamma=0.9,
policy_kwargs=policy_kwargs,
buffer_size=1_000_000,
learning_starts=100,
batch_size=64,
tau=1.0,
train_freq=2
)
model_dqn.set_logger(logger)
model_dqn.learn(LEARNING_STEPS*3,log_interval=1)
model_dqn.save("output/binary/dqn")
Using cpu device default: 0.333 - penalty: -1.74 reward: 2.66 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.88 reward: 3.60 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.78 reward: 3.62 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.90 reward: 3.54 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.69 reward: 2.74 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.82 reward: 2.64 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.84 reward: 2.65 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.63 reward: 2.75 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.62 reward: 2.75 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.62 reward: 2.83 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.74 reward: 2.74 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.68 reward: 3.66 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.83 reward: 2.65 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.78 reward: 3.64 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.86 reward: 2.60 - client: 173.0 month: 15 - done: True default: 0.333 - penalty: -1.91 reward: 2.64 - client: 214.0 month: 1 - done: True default: 0.342 - penalty: -1.74 reward: 2.63 - client: 49.0 month: 2 - done: True default: 0.342 - penalty: -1.83 reward: 2.61 - client: 519.0 month: 3 - done: True default: 0.357 - penalty: -1.90 reward: 3.54 - client: 158.0 month: 4 - done: True default: 0.318 - penalty: -1.78 reward: 3.72 - client: 250.0 month: 5 - done: True default: 0.337 - penalty: -1.78 reward: 2.64 - client: 399.0 month: 6 - done: True default: 0.332 - penalty: -1.73 reward: 2.67 - client: 273.0 month: 7 - done: True default: 0.318 - penalty: -1.69 reward: 3.74 - client: 271.0 month: 8 - done: True default: 0.318 - penalty: -1.63 reward: 2.75 - client: 441.0 month: 9 - done: True default: 0.302 - penalty: -1.66 reward: 2.82 - client: 384.0 month: 10 - done: True default: 0.317 - penalty: -1.70 reward: 3.74 - client: 456.0 month: 11 - done: True default: 0.337 - penalty: -1.85 reward: 2.63 - client: 22.0 month: 12 - done: True default: 0.333 - penalty: -1.88 reward: 3.64 - client: 244.0 month: 13 - done: True default: 0.337 - penalty: -1.87 reward: 2.63 - client: 221.0 month: 14 - done: True default: 0.345 - penalty: -1.83 reward: 2.60 - client: 413.0 month: 15 - done: True default: 0.333 - penalty: -1.79 reward: 2.65 - client: 90.0 month: 1 - done: True default: 0.342 - penalty: -1.79 reward: 2.62 - client: 379.0 month: 2 - done: True default: 0.342 - penalty: -1.82 reward: 2.61 - client: 206.0 month: 3 - done: True default: 0.357 - penalty: -1.93 reward: 2.54 - client: 218.0 month: 4 - done: True default: 0.318 - penalty: -1.69 reward: 2.74 - client: 426.0 month: 5 - done: True default: 0.337 - penalty: -1.82 reward: 2.63 - client: 238.0 month: 6 - done: True default: 0.332 - penalty: -1.79 reward: 2.66 - client: 466.0 month: 7 - done: True default: 0.318 - penalty: -1.73 reward: 2.73 - client: 176.0 month: 8 - done: True default: 0.318 - penalty: -1.69 reward: 2.74 - client: 379.0 month: 9 - done: True default: 0.302 - penalty: -1.63 reward: 3.83 - client: 392.0 month: 10 - done: True default: 0.317 - penalty: -1.66 reward: 2.75 - client: 533.0 month: 11 - done: True default: 0.337 - penalty: -1.82 reward: 2.63 - client: 577.0 month: 12 - done: True default: 0.333 - penalty: -1.83 reward: 3.65 - client: 52.0 month: 13 - done: True default: 0.337 - penalty: -1.84 reward: 2.63 - client: 545.0 month: 14 - done: True default: 0.345 - penalty: -1.88 reward: 2.59 - client: 151.0 month: 15 - done: True default: 0.333 - penalty: -1.82 reward: 2.65 - client: 559.0 month: 1 - done: True default: 0.342 - penalty: -1.91 reward: 3.60 - client: 29.0 month: 2 - done: True default: 0.342 - penalty: -1.84 reward: 2.61 - client: 217.0 month: 3 - done: True
n_steps = len(train_subset)
rew_dqn, _ = predict_from_env(model_dqn, training_env, n_steps)
act_dqn, date_codes = predict_from_frame(model_dqn, train_subset[features])
action: [0] - reward : [2.55323299] - done: [False] action: [1] - reward : [3.5383097] - done: [False] action: [0] - reward : [2.5302361] - done: [False] action: [1] - reward : [3.54840649] - done: [False] action: [0] - reward : [2.5356083] - done: [False] action: [1] - reward : [3.5383097] - done: [False] default: 0.357 - penalty: -1.93 reward: 2.54 - client: 15.0 month: 4 - done: True action: [0] - reward : [2.69773879] - done: [False] action: [0] - reward : [2.73415921] - done: [False] action: [0] - reward : [2.73180769] - done: [False] action: [1] - reward : [3.73883143] - done: [False] action: [0] - reward : [2.73415921] - done: [False] action: [0] - reward : [2.74116126] - done: [False] default: 0.318 - penalty: -1.74 reward: 2.73 - client: 513.0 month: 5 - done: True action: [0] - reward : [2.64846993] - done: [False] action: [0] - reward : [2.64441836] - done: [False] action: [0] - reward : [2.62752885] - done: [False] action: [0] - reward : [2.63588498] - done: [False] action: [0] - reward : [2.6313783] - done: [False] action: [1] - reward : [3.62752885] - done: [False] default: 0.337 - penalty: -1.79 reward: 2.64 - client: 93.0 month: 6 - done: True action: [0] - reward : [2.6600092] - done: [False] action: [1] - reward : [3.6671976] - done: [False] action: [0] - reward : [2.67017804] - done: [False] action: [0] - reward : [2.66876267] - done: [False] action: [0] - reward : [2.65406066] - done: [False] action: [0] - reward : [2.65864717] - done: [False] default: 0.332 - penalty: -1.81 reward: 2.66 - client: 425.0 month: 7 - done: True action: [0] - reward : [2.71728266] - done: [False] action: [0] - reward : [2.73031498] - done: [False] action: [1] - reward : [3.73500046] - done: [False] action: [1] - reward : [3.73571634] - done: [False] action: [1] - reward : [3.73337212] - done: [False] action: [0] - reward : [2.7365016] - done: [False] default: 0.318 - penalty: -1.74 reward: 2.73 - client: 206.0 month: 8 - done: True action: [0] - reward : [2.72868368] - done: [False] action: [1] - reward : [3.73180769] - done: [False] action: [0] - reward : [2.72867948] - done: [False] action: [0] - reward : [2.7215997] - done: [False] action: [1] - reward : [3.73651073] - done: [False] action: [1] - reward : [3.7357439] - done: [False] default: 0.318 - penalty: -1.69 reward: 2.74 - client: 66.0 month: 9 - done: True action: [0] - reward : [2.81750312] - done: [False] action: [0] - reward : [2.82194186] - done: [False] action: [1] - reward : [3.83455903] - done: [False] action: [1] - reward : [3.83739338] - done: [False] action: [1] - reward : [3.81384396] - done: [False] action: [0] - reward : [2.83518677] - done: [False] default: 0.302 - penalty: -1.62 reward: 3.83 - client: 269.0 month: 10 - done: True action: [1] - reward : [3.75489337] - done: [False] action: [0] - reward : [2.74589047] - done: [False] action: [0] - reward : [2.73648037] - done: [False] action: [1] - reward : [3.7325627] - done: [False] action: [0] - reward : [2.74120198] - done: [False] action: [0] - reward : [2.74512193] - done: [False] default: 0.317 - penalty: -1.72 reward: 2.74 - client: 151.0 month: 11 - done: True action: [0] - reward : [2.65168869] - done: [False] action: [0] - reward : [2.62274088] - done: [False] action: [1] - reward : [3.64219127] - done: [False] action: [1] - reward : [3.63614723] - done: [False] action: [0] - reward : [2.63656907] - done: [False] action: [0] - reward : [2.63679756] - done: [False] default: 0.337 - penalty: -1.76 reward: 3.64 - client: 592.0 month: 12 - done: True action: [1] - reward : [3.657667] - done: [False] action: [0] - reward : [2.65848604] - done: [False] action: [0] - reward : [2.65239173] - done: [False] action: [1] - reward : [3.66610789] - done: [False] action: [0] - reward : [2.65081202] - done: [False] action: [1] - reward : [3.6555517] - done: [False] default: 0.333 - penalty: -1.85 reward: 2.64 - client: 368.0 month: 13 - done: True action: [1] - reward : [3.63075598] - done: [False] action: [1] - reward : [3.6313783] - done: [False] action: [1] - reward : [3.62661048] - done: [False] action: [0] - reward : [2.64061435] - done: [False] action: [0] - reward : [2.64820977] - done: [False] action: [0] - reward : [2.63204896] - done: [False] default: 0.337 - penalty: -1.75 reward: 2.65 - client: 275.0 month: 14 - done: True action: [0] - reward : [2.61185335] - done: [False] action: [0] - reward : [2.58169313] - done: [False] action: [0] - reward : [2.59251351] - done: [False] action: [0] - reward : [2.59946383] - done: [False] action: [0] - reward : [2.59665811] - done: [False] action: [0] - reward : [2.59726559] - done: [False] default: 0.345 - penalty: -1.85 reward: 3.60 - client: 223.0 month: 15 - done: True action: [1] - reward : [3.64413677] - done: [False] action: [0] - reward : [2.65937593] - done: [False] action: [0] - reward : [2.65397162] - done: [False] action: [0] - reward : [2.6492325] - done: [False] action: [0] - reward : [2.65171441] - done: [False] action: [0] - reward : [2.6513902] - done: [False] default: 0.333 - penalty: -1.80 reward: 2.65 - client: 321.0 month: 1 - done: True action: [1] - reward : [3.6181621] - done: [False] action: [1] - reward : [3.6083826] - done: [False] action: [1] - reward : [3.61061035] - done: [False] action: [1] - reward : [3.60997006] - done: [False] action: [0] - reward : [2.60520822] - done: [False] action: [0] - reward : [2.61061035] - done: [False] default: 0.342 - penalty: -1.81 reward: 3.62 - client: 147.0 month: 2 - done: True action: [0] - reward : [2.61598734] - done: [False] action: [0] - reward : [2.59912614] - done: [False] action: [0] - reward : [2.61693559] - done: [False] action: [1] - reward : [3.61473346] - done: [False] action: [0] - reward : [2.61093704] - done: [False] action: [0] - reward : [2.60296731] - done: [False] default: 0.342 - penalty: -1.85 reward: 3.61 - client: 423.0 month: 3 - done: True action: [0] - reward : [2.55258818] - done: [False] action: [1] - reward : [3.5383097] - done: [False] action: [0] - reward : [2.54319546] - done: [False] action: [0] - reward : [2.54840649] - done: [False] action: [0] - reward : [2.54363993] - done: [False] action: [0] - reward : [2.53990382] - done: [False] default: 0.357 - penalty: -1.83 reward: 2.56 - client: 366.0 month: 4 - done: True action: [1] - reward : [3.7143976] - done: [False]
plot_default_rate_history(date_codes = date_codes,
actual = train_subset[default_col],
actions = act_dqn,
dates_from_codes = dates_from_codes,
save_path="../imgs/2_modeling/dfr_history_dqn.png",
title = "Vanilla DQN - Actual vs. Predicted")
plot_reward_history(reward = rew_dqn,
window_size=10,
save_path="../imgs/2_modeling/rew_history_dqn.png",
title = "Vanilla DQN - Reward")
plot_actions_running_mean(window_size=64,
actions=act_dqn,
save_path="../imgs/2_modeling/act_run_mean_dqn.png",
title="Vanilla DQN - Actions Running Mean (i.e. default rate)")
print("-"*30)
print(pd.DataFrame(confusion_matrix(train_subset[default_col].values, act_dqn), columns=["True 0", "True 1"], index=["Pred 0", "Pred 1"]))
print("-"*30)
print(classification_report(train_subset[default_col].values, act_dqn))
------------------------------
True 0 True 1
Pred 0 6341 110
Pred 1 4 3193
------------------------------
precision recall f1-score support
0.0 1.00 0.98 0.99 6451
1.0 0.97 1.00 0.98 3197
accuracy 0.99 9648
macro avg 0.98 0.99 0.99 9648
weighted avg 0.99 0.99 0.99 9648
kwargs = {
'features_col': features, # feature colums
'default_col': 'default_120_12m', # default column
'obs_dim': len(features),
'action_dim' : 2, # 0,1
'client_dim': len(train_subset.loc[0].index.unique()), # number of clients
'model_name': "test"
}
num_envs = 2
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED,rng=rng, accepts_discrete_action=True, **kwargs)for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
log_path = "logs/binary/a2c"
logger = create_logger(log_path)
feature_extractor = GRUNetwork
# Now use the custom network in policy_kwargs
policy_kwargs = dict(
features_extractor_class=feature_extractor,
features_extractor_kwargs=dict(first_layer=400,
first_layer_dropout=0.1,
output_layer=400,
output_layer_dropout=0.1)
)
model_a2c = A2C("MlpPolicy",
training_env,
verbose=0,
device="cpu",
learning_rate=1e-3,
gamma=0.9,
seed=SEED,
n_steps= 128,
ent_coef=0.02,
policy_kwargs=policy_kwargs
)
model_a2c.set_logger(logger)
model_a2c.learn(LEARNING_STEPS*num_envs*2, log_interval=1)
model_a2c.save("output/binary/a2c")
default: 0.333 - penalty: -1.89 reward: 2.64 - client: 207.0 month: 1 - done: True default: 0.333 - penalty: -1.78 reward: 2.65 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.95 reward: -2.63 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -2.00 reward: -2.64 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.98 reward: 3.59 - client: 470.0 month: 3 - done: True default: 0.342 - penalty: -1.91 reward: -2.62 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -2.03 reward: 3.52 - client: 588.0 month: 4 - done: True default: 0.357 - penalty: -1.96 reward: 3.53 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.82 reward: 2.71 - client: 26.0 month: 5 - done: True default: 0.318 - penalty: -1.75 reward: 2.73 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.71 reward: 2.65 - client: 375.0 month: 6 - done: True default: 0.337 - penalty: -1.75 reward: 2.65 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.92 reward: 2.64 - client: 191.0 month: 7 - done: Truedefault: 0.332 - penalty: -1.93 reward: 2.64 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.76 reward: 2.72 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.72 reward: 2.73 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.75 reward: 2.73 - client: 248.0 month: 9 - done: True default: 0.318 - penalty: -1.74 reward: 2.73 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.70 reward: 2.82 - client: 139.0 month: 10 - done: True default: 0.302 - penalty: -1.70 reward: 2.82 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.79 reward: 2.73 - client: 528.0 month: 11 - done: True default: 0.317 - penalty: -1.76 reward: 2.73 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.78 reward: -2.60 - client: 381.0 month: 12 - done: True default: 0.337 - penalty: -1.75 reward: -2.59 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.77 reward: 2.66 - client: 560.0 month: 13 - done: True default: 0.333 - penalty: -1.79 reward: 2.65 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.61 reward: 3.67 - client: 459.0 month: 14 - done: True default: 0.337 - penalty: -1.64 reward: 3.66 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.79 reward: 2.61 - client: 173.0 month: 15 - done: True default: 0.345 - penalty: -1.81 reward: 2.60 - client: 173.0 month: 15 - done: True default: 0.333 - penalty: -1.94 reward: 2.63 - client: 214.0 month: 1 - done: True default: 0.333 - penalty: -1.95 reward: 2.63 - client: 214.0 month: 1 - done: True default: 0.342 - penalty: -1.82 reward: 2.61 - client: 49.0 month: 2 - done: True default: 0.342 - penalty: -1.83 reward: 2.61 - client: 49.0 month: 2 - done: True default: 0.342 - penalty: -1.87 reward: 2.61 - client: 519.0 month: 3 - done: True default: 0.342 - penalty: -1.90 reward: 2.60 - client: 519.0 month: 3 - done: True default: 0.357 - penalty: -2.03 reward: -2.66 - client: 158.0 month: 4 - done: True default: 0.357 - penalty: -2.01 reward: -2.65 - client: 158.0 month: 4 - done: True default: 0.318 - penalty: -1.84 reward: 3.71 - client: 250.0 month: 5 - done: True default: 0.318 - penalty: -1.82 reward: 3.71 - client: 250.0 month: 5 - done: True default: 0.337 - penalty: -1.95 reward: 2.61 - client: 399.0 month: 6 - done: True default: 0.337 - penalty: -1.94 reward: 2.62 - client: 399.0 month: 6 - done: True default: 0.332 - penalty: -1.77 reward: 2.66 - client: 273.0 month: 7 - done: True default: 0.332 - penalty: -1.77 reward: 2.66 - client: 273.0 month: 7 - done: True default: 0.318 - penalty: -1.58 reward: 3.75 - client: 271.0 month: 8 - done: True default: 0.318 - penalty: -1.59 reward: 3.75 - client: 271.0 month: 8 - done: True default: 0.318 - penalty: -1.68 reward: 2.74 - client: 441.0 month: 9 - done: True default: 0.318 - penalty: -1.68 reward: 2.74 - client: 441.0 month: 9 - done: True default: 0.302 - penalty: -1.71 reward: 2.81 - client: 384.0 month: 10 - done: True default: 0.302 - penalty: -1.71 reward: 2.81 - client: 384.0 month: 10 - done: True default: 0.317 - penalty: -1.80 reward: -2.58 - client: 456.0 month: 11 - done: True default: 0.317 - penalty: -1.80 reward: -2.58 - client: 456.0 month: 11 - done: True default: 0.337 - penalty: -1.71 reward: 2.65 - client: 22.0 month: 12 - done: True default: 0.337 - penalty: -1.71 reward: 2.65 - client: 22.0 month: 12 - done: True default: 0.333 - penalty: -1.83 reward: 3.65 - client: 244.0 month: 13 - done: Truedefault: 0.333 - penalty: -1.86 reward: 3.64 - client: 244.0 month: 13 - done: True default: 0.337 - penalty: -1.79 reward: 2.64 - client: 221.0 month: 14 - done: True default: 0.337 - penalty: -1.79 reward: 2.64 - client: 221.0 month: 14 - done: True default: 0.345 - penalty: -1.86 reward: -1.62 - client: 413.0 month: 15 - done: True default: 0.345 - penalty: -1.86 reward: -1.62 - client: 413.0 month: 15 - done: True default: 0.333 - penalty: -1.87 reward: 2.64 - client: 90.0 month: 1 - done: True default: 0.333 - penalty: -1.88 reward: 2.64 - client: 90.0 month: 1 - done: True default: 0.342 - penalty: -1.83 reward: 2.61 - client: 379.0 month: 2 - done: True default: 0.342 - penalty: -1.83 reward: 2.61 - client: 379.0 month: 2 - done: True
n_steps = len(train_subset)
rew_a2c, _ = predict_from_env(model_a2c, training_env, n_steps)
act_a2c, date_codes = predict_from_frame(model_a2c, train_subset[features])
action: [1 1] - reward : [3.60428888 3.60428888] - done: [False False] action: [1 1] - reward : [3.60744913 3.60744913] - done: [False False] action: [1 1] - reward : [3.61441258 3.61441258] - done: [False False] action: [0 0] - reward : [2.61506585 2.61506585] - done: [False False] action: [1 1] - reward : [3.61193095 3.61193095] - done: [False False] action: [0 0] - reward : [2.62447886 2.62447886] - done: [False False] default: 0.342 - penalty: -1.76 reward: 2.62 - client: 206.0 month: 3 - done: True default: 0.342 - penalty: -1.76 reward: 2.62 - client: 206.0 month: 3 - done: True action: [0 0] - reward : [2.5340021 2.5340021] - done: [False False] action: [0 0] - reward : [2.54522853 2.54522853] - done: [False False] action: [0 0] - reward : [2.53512208 2.53512208] - done: [False False] action: [0 0] - reward : [2.5426173 2.5426173] - done: [False False] action: [1 1] - reward : [3.5383097 3.5383097] - done: [False False] action: [0 0] - reward : [2.5302361 2.5302361] - done: [False False] default: 0.357 - penalty: -1.93 reward: 2.54 - client: 218.0 month: 4 - done: True default: 0.357 - penalty: -1.93 reward: 2.54 - client: 218.0 month: 4 - done: True action: [0 0] - reward : [2.7310458 2.7310458] - done: [False False] action: [1 1] - reward : [3.72394682 3.72394682] - done: [False False] action: [0 0] - reward : [2.73727375 2.73727375] - done: [False False] action: [0 0] - reward : [2.73180769 2.73180769] - done: [False False] action: [1 1] - reward : [3.72002885 3.72002885] - done: [False False] action: [0 0] - reward : [2.74271212 2.74271212] - done: [False False] default: 0.318 - penalty: -1.70 reward: 2.73 - client: 426.0 month: 5 - done: True default: 0.318 - penalty: -1.70 reward: 2.73 - client: 426.0 month: 5 - done: True action: [0 0] - reward : [-2.60151048 -2.60151048] - done: [False False] action: [0 0] - reward : [2.63046646 2.63046646] - done: [False False] action: [0 0] - reward : [2.63614723 2.63614723] - done: [False False] action: [0 0] - reward : [2.62661048 2.62661048] - done: [False False] action: [0 0] - reward : [2.63588498 2.63588498] - done: [False False] action: [0 0] - reward : [2.63273329 2.63273329] - done: [False False] default: 0.337 - penalty: -1.85 reward: 2.63 - client: 238.0 month: 6 - done: True default: 0.337 - penalty: -1.85 reward: 2.63 - client: 238.0 month: 6 - done: True action: [0 0] - reward : [2.64772334 2.64772334] - done: [False False] action: [1 1] - reward : [3.66563283 3.66563283] - done: [False False] action: [0 0] - reward : [2.66178895 2.66178895] - done: [False False] action: [1 1] - reward : [3.67345971 3.67345971] - done: [False False] action: [0 0] - reward : [2.66040006 2.66040006] - done: [False False] action: [0 0] - reward : [2.65564532 2.65564532] - done: [False False] default: 0.332 - penalty: -1.81 reward: 2.66 - client: 466.0 month: 7 - done: True default: 0.332 - penalty: -1.81 reward: 2.66 - client: 466.0 month: 7 - done: True action: [0 0] - reward : [2.72948953 2.72948953] - done: [False False] action: [0 0] - reward : [2.72788459 2.72788459] - done: [False False] action: [1 1] - reward : [3.73180769 3.73180769] - done: [False False] action: [0 0] - reward : [2.73493676 2.73493676] - done: [False False] action: [0 0] - reward : [2.73024348 2.73024348] - done: [False False] action: [1 1] - reward : [3.72867948 3.72867948] - done: [False False] default: 0.318 - penalty: -1.73 reward: 2.73 - client: 176.0 month: 8 - done: True default: 0.318 - penalty: -1.73 reward: 2.73 - client: 176.0 month: 8 - done: True action: [0 0] - reward : [2.73024348 2.73024348] - done: [False False] action: [0 0] - reward : [2.72242568 2.72242568] - done: [False False] action: [0 0] - reward : [2.72236801 2.72236801] - done: [False False] action: [0 0] - reward : [2.72788459 2.72788459] - done: [False False] action: [0 0] - reward : [2.72868368 2.72868368] - done: [False False] action: [1 1] - reward : [3.72867948 3.72867948] - done: [False False] default: 0.318 - penalty: -1.72 reward: 2.73 - client: 379.0 month: 9 - done: True default: 0.318 - penalty: -1.72 reward: 2.73 - client: 379.0 month: 9 - done: True action: [1 1] - reward : [3.81790134 3.81790134] - done: [False False] action: [0 0] - reward : [2.82877062 2.82877062] - done: [False False] action: [0 0] - reward : [2.82657474 2.82657474] - done: [False False] action: [0 0] - reward : [2.82039809 2.82039809] - done: [False False] action: [0 0] - reward : [2.83455903 2.83455903] - done: [False False] action: [1 1] - reward : [3.82503019 3.82503019] - done: [False False] default: 0.302 - penalty: -1.65 reward: 3.82 - client: 392.0 month: 10 - done: True default: 0.302 - penalty: -1.65 reward: 3.82 - client: 392.0 month: 10 - done: True action: [0 0] - reward : [2.75018801 2.75018801] - done: [False False] action: [0 0] - reward : [2.73727134 2.73727134] - done: [False False] action: [1 1] - reward : [3.73889873 3.73889873] - done: [False False] action: [0 0] - reward : [2.74356278 2.74356278] - done: [False False] action: [0 0] - reward : [2.72859476 2.72859476] - done: [False False] action: [0 0] - reward : [2.73651541 2.73651541] - done: [False False] default: 0.317 - penalty: -1.66 reward: 2.75 - client: 533.0 month: 11 - done: True default: 0.317 - penalty: -1.66 reward: 2.75 - client: 533.0 month: 11 - done: True action: [0 0] - reward : [2.66117614 2.66117614] - done: [False False] action: [1 1] - reward : [3.6218438 3.6218438] - done: [False False] action: [0 0] - reward : [2.62730203 2.62730203] - done: [False False] action: [1 1] - reward : [3.63391367 3.63391367] - done: [False False] action: [1 1] - reward : [3.6297889 3.6297889] - done: [False False] action: [0 0] - reward : [2.62912496 2.62912496] - done: [False False] default: 0.337 - penalty: -1.82 reward: 2.63 - client: 577.0 month: 12 - done: True default: 0.337 - penalty: -1.82 reward: 2.63 - client: 577.0 month: 12 - done: True action: [0 0] - reward : [2.64822427 2.64822427] - done: [False False] action: [1 1] - reward : [3.6530827 3.6530827] - done: [False False] action: [0 0] - reward : [2.6555517 2.6555517] - done: [False False] action: [0 0] - reward : [2.64378269 2.64378269] - done: [False False] action: [0 0] - reward : [2.66230339 2.66230339] - done: [False False] action: [0 0] - reward : [2.63975949 2.63975949] - done: [False False] default: 0.333 - penalty: -1.81 reward: 3.65 - client: 52.0 month: 13 - done: True default: 0.333 - penalty: -1.81 reward: 3.65 - client: 52.0 month: 13 - done: True action: [0 0] - reward : [2.63602534 2.63602534] - done: [False False] action: [0 0] - reward : [2.63296782 2.63296782] - done: [False False] action: [1 1] - reward : [3.63455746 3.63455746] - done: [False False] action: [0 0] - reward : [2.64284791 2.64284791] - done: [False False] action: [1 1] - reward : [3.6259328 3.6259328] - done: [False False] action: [0 0] - reward : [2.63813832 2.63813832] - done: [False False] default: 0.337 - penalty: -1.82 reward: 2.63 - client: 545.0 month: 14 - done: True default: 0.337 - penalty: -1.82 reward: 2.63 - client: 545.0 month: 14 - done: True action: [0 0] - reward : [2.60077008 2.60077008] - done: [False False] action: [0 0] - reward : [2.59946383 2.59946383] - done: [False False] action: [0 0] - reward : [2.59347677 2.59347677] - done: [False False] action: [0 0] - reward : [2.59188635 2.59188635] - done: [False False] action: [0 0] - reward : [2.58870604 2.58870604] - done: [False False] action: [0 0] - reward : [2.58488568 2.58488568] - done: [False False] default: 0.345 - penalty: -1.92 reward: 2.59 - client: 151.0 month: 15 - done: True default: 0.345 - penalty: -1.92 reward: 2.59 - client: 151.0 month: 15 - done: True action: [0 0] - reward : [2.63340421 2.63340421] - done: [False False] action: [0 0] - reward : [2.64607404 2.64607404] - done: [False False] action: [0 0] - reward : [2.65081202 2.65081202] - done: [False False] action: [0 0] - reward : [2.65151001 2.65151001] - done: [False False] action: [0 0] - reward : [2.64836539 2.64836539] - done: [False False] action: [0 0] - reward : [2.65081202 2.65081202] - done: [False False] default: 0.333 - penalty: -1.83 reward: 2.65 - client: 559.0 month: 1 - done: True default: 0.333 - penalty: -1.83 reward: 2.65 - client: 559.0 month: 1 - done: True action: [0 0] - reward : [2.61270182 2.61270182] - done: [False False] action: [0 0] - reward : [2.6077488 2.6077488] - done: [False False] action: [1 1] - reward : [3.61219132 3.61219132] - done: [False False] action: [0 0] - reward : [2.60615485 2.60615485] - done: [False False] action: [0 0] - reward : [2.60044801 2.60044801] - done: [False False] action: [0 0] - reward : [2.6083826 2.6083826] - done: [False False] default: 0.342 - penalty: -1.91 reward: 3.60 - client: 29.0 month: 2 - done: True default: 0.342 - penalty: -1.91 reward: 3.60 - client: 29.0 month: 2 - done: True action: [0 0] - reward : [2.6007264 2.6007264] - done: [False False] action: [1 1] - reward : [3.62761919 3.62761919] - done: [False False] action: [1 1] - reward : [3.59978025 3.59978025] - done: [False False] action: [1 1] - reward : [3.60456102 3.60456102] - done: [False False] action: [0 0] - reward : [2.60811646 2.60811646] - done: [False False] action: [0 0] - reward : [2.6077488 2.6077488] - done: [False False] default: 0.342 - penalty: -1.88 reward: 2.61 - client: 217.0 month: 3 - done: True default: 0.342 - penalty: -1.88 reward: 2.61 - client: 217.0 month: 3 - done: True action: [1 1] - reward : [3.5477927 3.5477927] - done: [False False]
plot_default_rate_history(date_codes = date_codes,
actual = train_subset[default_col],
actions = act_a2c,
dates_from_codes = dates_from_codes,
title = "A2C - Actual vs. Predicted",
save_path="../imgs/2_modeling/dfr_history_a2c.png")
plot_reward_history(reward = rew_a2c,
window_size=10,
title = "A2C - Reward",
save_path="../imgs/2_modeling/rew_history_a2c.png")
plot_actions_running_mean(actions=act_a2c,
window_size=64,
title="A2C - Actions Running Mean (i.e. default rate)",
save_path="../imgs/2_modeling/act_run_mean_a2c.png")
print("-"*30)
print(pd.DataFrame(confusion_matrix(train_subset[default_col].values, act_a2c), columns=["True 0", "True 1"], index=["Pred 0", "Pred 1"]))
print("-"*30)
print(classification_report(train_subset[default_col].values, act_a2c))
------------------------------
True 0 True 1
Pred 0 6347 104
Pred 1 265 2932
------------------------------
precision recall f1-score support
0.0 0.96 0.98 0.97 6451
1.0 0.97 0.92 0.94 3197
accuracy 0.96 9648
macro avg 0.96 0.95 0.96 9648
weighted avg 0.96 0.96 0.96 9648
kwargs = {
'features_col': features, # feature colums
'default_col': 'default_120_12m', # default column
'obs_dim': len(features),
'action_dim' : 2, # 0,1
'client_dim': len(train_subset.loc[0].index.unique()), # number of clients
'model_name': "test"
}
num_envs = 1
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED,rng=rng, accepts_discrete_action=True, **kwargs)for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
log_path = "logs/binary/trpo"
logger = create_logger(log_path)
feature_extractor = GRUNetwork
# Now use the custom network in policy_kwargs
policy_kwargs = dict(
features_extractor_class=feature_extractor,
features_extractor_kwargs=dict(first_layer=400,
first_layer_dropout=0.3,
output_layer=400,
output_layer_dropout=0.1)
)
model_trpo = TRPO("MlpPolicy",
training_env,
verbose=0,
seed = SEED,
device="cpu",
learning_rate=1e-3,
batch_size=64,
n_steps=1024,#4096, 2048
gamma=0.9,
policy_kwargs=policy_kwargs
)
model_trpo.set_logger(logger)
model_trpo.learn(LEARNING_STEPS*num_envs*2,log_interval=1)
model_trpo.save("output/binary/trpo")
default: 0.342 - penalty: -1.73 reward: 3.63 - client: 359.0 month: 2 - done: True default: 0.342 - penalty: -1.81 reward: -1.61 - client: 13.0 month: 3 - done: True default: 0.357 - penalty: -1.82 reward: 2.56 - client: 158.0 month: 4 - done: True default: 0.318 - penalty: -1.65 reward: -2.56 - client: 250.0 month: 5 - done: True default: 0.337 - penalty: -1.82 reward: 2.63 - client: 399.0 month: 6 - done: True default: 0.332 - penalty: -1.67 reward: -1.58 - client: 273.0 month: 7 - done: True default: 0.318 - penalty: -1.66 reward: 3.74 - client: 271.0 month: 8 - done: True default: 0.318 - penalty: -1.61 reward: 2.75 - client: 441.0 month: 9 - done: True default: 0.302 - penalty: -1.54 reward: 2.84 - client: 384.0 month: 10 - done: True default: 0.317 - penalty: -1.61 reward: 3.76 - client: 456.0 month: 11 - done: True default: 0.337 - penalty: -1.77 reward: -1.60 - client: 22.0 month: 12 - done: True default: 0.333 - penalty: -1.78 reward: 3.66 - client: 244.0 month: 13 - done: True default: 0.337 - penalty: -1.78 reward: 2.64 - client: 221.0 month: 14 - done: True default: 0.345 - penalty: -1.78 reward: -1.61 - client: 413.0 month: 15 - done: True default: 0.333 - penalty: -1.75 reward: 2.66 - client: 90.0 month: 1 - done: True default: 0.342 - penalty: -1.70 reward: -1.59 - client: 379.0 month: 2 - done: True default: 0.342 - penalty: -1.80 reward: 2.62 - client: 206.0 month: 3 - done: True default: 0.357 - penalty: -1.88 reward: 2.55 - client: 218.0 month: 4 - done: True default: 0.318 - penalty: -1.59 reward: 2.75 - client: 426.0 month: 5 - done: True default: 0.337 - penalty: -1.79 reward: 2.64 - client: 238.0 month: 6 - done: True default: 0.332 - penalty: -1.73 reward: 2.67 - client: 466.0 month: 7 - done: True default: 0.318 - penalty: -1.64 reward: 2.74 - client: 176.0 month: 8 - done: True default: 0.318 - penalty: -1.60 reward: 2.75 - client: 379.0 month: 9 - done: True default: 0.302 - penalty: -1.55 reward: -2.53 - client: 392.0 month: 10 - done: True default: 0.317 - penalty: -1.67 reward: -1.56 - client: 533.0 month: 11 - done: True default: 0.337 - penalty: -1.78 reward: -1.60 - client: 577.0 month: 12 - done: True default: 0.333 - penalty: -1.76 reward: 3.66 - client: 52.0 month: 13 - done: True default: 0.337 - penalty: -1.71 reward: 2.65 - client: 545.0 month: 14 - done: True default: 0.345 - penalty: -1.87 reward: -1.62 - client: 151.0 month: 15 - done: True default: 0.333 - penalty: -1.62 reward: -1.57 - client: 559.0 month: 1 - done: True default: 0.342 - penalty: -1.77 reward: 3.62 - client: 29.0 month: 2 - done: True default: 0.342 - penalty: -1.80 reward: 2.62 - client: 217.0 month: 3 - done: True default: 0.357 - penalty: -1.83 reward: -1.62 - client: 15.0 month: 4 - done: True
n_steps = len(train_subset)
rew_trpo, _ = predict_from_env(model_trpo, training_env, n_steps)
act_trpo, date_codes = predict_from_frame(model_trpo, train_subset[features])
action: [1] - reward : [3.74046111] - done: [False] action: [0] - reward : [2.73727375] - done: [False] action: [0] - reward : [2.73024348] - done: [False] action: [1] - reward : [3.7310279] - done: [False] action: [0] - reward : [2.73417177] - done: [False] action: [0] - reward : [2.72555213] - done: [False] default: 0.318 - penalty: -1.75 reward: 2.73 - client: 513.0 month: 5 - done: True action: [0] - reward : [2.63296782] - done: [False] action: [1] - reward : [3.62752885] - done: [False] action: [1] - reward : [3.63838079] - done: [False] action: [0] - reward : [2.63273329] - done: [False] action: [1] - reward : [3.62343256] - done: [False] action: [1] - reward : [3.63679756] - done: [False] default: 0.337 - penalty: -1.82 reward: 2.64 - client: 93.0 month: 6 - done: True action: [0] - reward : [2.67345971] - done: [False] action: [0] - reward : [-2.59699179] - done: [False] action: [0] - reward : [-2.59999364] - done: [False] action: [0] - reward : [2.65937694] - done: [False] action: [1] - reward : [3.66876267] - done: [False] action: [0] - reward : [2.65707667] - done: [False] default: 0.332 - penalty: -1.85 reward: 2.65 - client: 425.0 month: 7 - done: True action: [1] - reward : [3.72567208] - done: [False] action: [0] - reward : [2.72236801] - done: [False] action: [1] - reward : [3.73260237] - done: [False] action: [0] - reward : [2.72711569] - done: [False] action: [0] - reward : [2.72474185] - done: [False] action: [0] - reward : [2.72631315] - done: [False] default: 0.318 - penalty: -1.77 reward: 2.72 - client: 206.0 month: 8 - done: True action: [1] - reward : [-1.57678449] - done: [False] action: [0] - reward : [2.72711569] - done: [False] action: [0] - reward : [2.71605352] - done: [False] action: [0] - reward : [2.72078927] - done: [False] action: [0] - reward : [2.72631315] - done: [False] action: [0] - reward : [2.74581484] - done: [False] default: 0.318 - penalty: -1.69 reward: 2.74 - client: 66.0 month: 9 - done: True action: [1] - reward : [3.8185261] - done: [False] action: [0] - reward : [2.83150153] - done: [False] action: [1] - reward : [3.82164795] - done: [False] action: [0] - reward : [2.82633136] - done: [False] action: [0] - reward : [2.8349852] - done: [False] action: [0] - reward : [2.82903814] - done: [False] default: 0.302 - penalty: -1.74 reward: 3.81 - client: 269.0 month: 10 - done: True action: [0] - reward : [2.74432743] - done: [False] action: [0] - reward : [2.73339216] - done: [False] action: [1] - reward : [3.73174878] - done: [False] action: [0] - reward : [2.7325627] - done: [False] action: [0] - reward : [2.74589047] - done: [False] action: [0] - reward : [2.73884117] - done: [False] default: 0.317 - penalty: -1.76 reward: 2.73 - client: 151.0 month: 11 - done: True action: [1] - reward : [3.62730203] - done: [False] action: [0] - reward : [2.62274088] - done: [False] action: [1] - reward : [3.6297889] - done: [False] action: [0] - reward : [2.64154776] - done: [False] action: [0] - reward : [2.63430901] - done: [False] action: [0] - reward : [2.6374612] - done: [False] default: 0.337 - penalty: -1.86 reward: 3.63 - client: 592.0 month: 12 - done: True action: [0] - reward : [2.64607404] - done: [False] action: [1] - reward : [3.64219673] - done: [False] action: [0] - reward : [2.64993757] - done: [False] action: [0] - reward : [2.64836539] - done: [False] action: [0] - reward : [2.64308439] - done: [False] action: [1] - reward : [3.65848604] - done: [False] default: 0.333 - penalty: -1.89 reward: 2.64 - client: 368.0 month: 13 - done: True action: [0] - reward : [2.63363164] - done: [False] action: [0] - reward : [2.63046646] - done: [False] action: [1] - reward : [3.62114502] - done: [False] action: [0] - reward : [-2.61096403] - done: [False] action: [0] - reward : [2.64664544] - done: [False] action: [0] - reward : [2.63046646] - done: [False] default: 0.337 - penalty: -1.81 reward: 2.64 - client: 275.0 month: 14 - done: True action: [0] - reward : [2.57850107] - done: [False] action: [0] - reward : [2.59409731] - done: [False] action: [0] - reward : [2.59157708] - done: [False] action: [0] - reward : [2.59630827] - done: [False] action: [0] - reward : [2.58488568] - done: [False] action: [0] - reward : [2.59568134] - done: [False] default: 0.345 - penalty: -1.85 reward: 3.60 - client: 223.0 month: 15 - done: True action: [0] - reward : [2.65691955] - done: [False] action: [0] - reward : [2.64993757] - done: [False] action: [0] - reward : [2.65647492] - done: [False] action: [0] - reward : [-2.60234682] - done: [False] action: [0] - reward : [2.65081202] - done: [False] action: [0] - reward : [-2.60372933] - done: [False] default: 0.333 - penalty: -1.81 reward: 2.65 - client: 321.0 month: 1 - done: True action: [0] - reward : [2.61598734] - done: [False] action: [0] - reward : [2.61598734] - done: [False] action: [1] - reward : [3.62701848] - done: [False] action: [0] - reward : [2.60330628] - done: [False] action: [0] - reward : [2.5981869] - done: [False] action: [1] - reward : [3.60137372] - done: [False] default: 0.342 - penalty: -1.84 reward: 3.61 - client: 147.0 month: 2 - done: True action: [0] - reward : [2.61219132] - done: [False] action: [0] - reward : [2.61820216] - done: [False] action: [1] - reward : [3.61535394] - done: [False] action: [0] - reward : [2.58723696] - done: [False] action: [0] - reward : [2.61473346] - done: [False] action: [1] - reward : [3.60203456] - done: [False] default: 0.342 - penalty: -1.83 reward: 3.61 - client: 423.0 month: 3 - done: True action: [1] - reward : [3.54149816] - done: [False] action: [0] - reward : [2.55635083] - done: [False] action: [0] - reward : [2.5340021] - done: [False] action: [1] - reward : [3.53455589] - done: [False] action: [0] - reward : [2.53295604] - done: [False] action: [0] - reward : [2.54205159] - done: [False] default: 0.357 - penalty: -1.90 reward: 2.54 - client: 366.0 month: 4 - done: True action: [0] - reward : [2.70193774] - done: [False] action: [0] - reward : [2.73883143] - done: [False] action: [0] - reward : [2.7231707] - done: [False] action: [0] - reward : [2.72867948] - done: [False] action: [0] - reward : [2.72793356] - done: [False] action: [1] - reward : [-1.55962583] - done: [False] default: 0.318 - penalty: -1.75 reward: 2.73 - client: 39.0 month: 5 - done: True action: [1] - reward : [3.64531004] - done: [False]
plot_default_rate_history(date_codes = date_codes,
actual = train_subset[default_col],
actions = act_trpo,
dates_from_codes = dates_from_codes,
save_path="../imgs/2_modeling/dfr_history_trpo.png",
title = "TRPO - Actual vs. Predicted")
plot_reward_history(reward = rew_trpo,
window_size=10,
save_path="../imgs/2_modeling/rew_history_trpo.png",
title = "TRPO - Reward")
plot_actions_running_mean(actions=act_trpo,
window_size=64,
save_path="../imgs/2_modeling/act_run_mean_trpo.png",
title="TRPO - Actions Running Mean (i.e. default rate)")
print("-"*30)
print(pd.DataFrame(confusion_matrix(train_subset[default_col].values, act_trpo), columns=["True 0", "True 1"], index=["Pred 0", "Pred 1"]))
print("-"*30)
print(classification_report(train_subset[default_col].values, act_trpo))
------------------------------
True 0 True 1
Pred 0 6113 338
Pred 1 700 2497
------------------------------
precision recall f1-score support
0.0 0.90 0.95 0.92 6451
1.0 0.88 0.78 0.83 3197
accuracy 0.89 9648
macro avg 0.89 0.86 0.87 9648
weighted avg 0.89 0.89 0.89 9648
kwargs = {
'features_col': features, # feature colums
'default_col': 'default_120_12m', # default column
'obs_dim': len(features),
'action_dim' : 2, # 0,1
'client_dim': len(train_subset.loc[0].index.unique()), # number of clients
'model_name': "test"
}
num_envs = 1
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED,rng=rng, accepts_discrete_action=True, **kwargs)for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
log_path = "logs/binary/ppo"
logger = create_logger(log_path)
feature_extractor = GRUNetwork
# Now use the custom network in policy_kwargs
policy_kwargs = dict(
features_extractor_class=feature_extractor,
features_extractor_kwargs=dict(first_layer=400,
first_layer_dropout=0.3,
output_layer=400,
output_layer_dropout=0.1)
)
model_ppo = TRPO("MlpPolicy",
training_env,
verbose=0,
seed = SEED,
device="cpu",
learning_rate=1e-3,
batch_size=64,
n_steps=1024-10,#4096, 2048
gamma=0.9,
policy_kwargs=policy_kwargs
)
model_ppo.set_logger(logger)
_ = training_env.reset()
model_ppo.learn(LEARNING_STEPS*num_envs*2,log_interval=1)
model_ppo.save("output/binary/ppo")
default: 0.333 - penalty: -1.60 reward: 2.68 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.79 reward: 3.62 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.78 reward: 3.62 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.82 reward: 3.56 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.66 reward: -1.56 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.64 reward: 2.66 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.73 reward: 2.67 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.66 reward: -1.56 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.57 reward: 2.75 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.46 reward: -1.52 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.65 reward: 2.75 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.65 reward: 3.66 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.75 reward: -1.59 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.70 reward: 3.65 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.79 reward: -1.61 - client: 173.0 month: 15 - done: True default: 0.333 - penalty: -1.78 reward: -1.60 - client: 214.0 month: 1 - done: True default: 0.342 - penalty: -1.75 reward: 2.63 - client: 49.0 month: 2 - done: True default: 0.342 - penalty: -1.80 reward: -1.61 - client: 519.0 month: 3 - done: True default: 0.357 - penalty: -1.92 reward: 3.54 - client: 158.0 month: 4 - done: True default: 0.318 - penalty: -1.64 reward: 3.74 - client: 250.0 month: 5 - done: True default: 0.337 - penalty: -1.78 reward: 2.64 - client: 399.0 month: 6 - done: True default: 0.332 - penalty: -1.78 reward: 2.66 - client: 273.0 month: 7 - done: True default: 0.318 - penalty: -1.52 reward: 3.76 - client: 271.0 month: 8 - done: True default: 0.318 - penalty: -1.48 reward: -1.53 - client: 441.0 month: 9 - done: True default: 0.302 - penalty: -1.54 reward: -1.53 - client: 384.0 month: 10 - done: True default: 0.317 - penalty: -1.65 reward: -2.56 - client: 456.0 month: 11 - done: True default: 0.337 - penalty: -1.74 reward: 2.65 - client: 22.0 month: 12 - done: True default: 0.333 - penalty: -1.73 reward: 3.66 - client: 244.0 month: 13 - done: True default: 0.337 - penalty: -1.66 reward: 2.66 - client: 221.0 month: 14 - done: True default: 0.345 - penalty: -1.81 reward: 2.60 - client: 413.0 month: 15 - done: True default: 0.333 - penalty: -1.68 reward: 2.67 - client: 90.0 month: 1 - done: True default: 0.342 - penalty: -1.80 reward: 2.62 - client: 379.0 month: 2 - done: True default: 0.342 - penalty: -1.75 reward: 2.63 - client: 206.0 month: 3 - done: True
n_steps = len(train_subset)
rew_ppo, _ = predict_from_env(model_ppo, training_env, n_steps)
act_ppo, date_codes = predict_from_frame(model_ppo, train_subset[features])
action: [0] - reward : [2.55845131] - done: [False] action: [0] - reward : [2.53034227] - done: [False] default: 0.357 - penalty: -1.96 reward: 3.53 - client: 218.0 month: 4 - done: True action: [0] - reward : [2.6930331] - done: [False] action: [0] - reward : [2.72948953] - done: [False] action: [0] - reward : [2.72394682] - done: [False] action: [0] - reward : [2.72867948] - done: [False] action: [0] - reward : [2.71921061] - done: [False] action: [0] - reward : [2.74350608] - done: [False] default: 0.318 - penalty: -1.69 reward: 2.74 - client: 426.0 month: 5 - done: True action: [0] - reward : [2.65638267] - done: [False] action: [0] - reward : [2.62502145] - done: [False] action: [1] - reward : [3.64376843] - done: [False] action: [0] - reward : [2.62114502] - done: [False] action: [1] - reward : [3.64154776] - done: [False] action: [0] - reward : [2.63363164] - done: [False] default: 0.337 - penalty: -1.81 reward: 2.64 - client: 238.0 month: 6 - done: True action: [1] - reward : [-1.59809177] - done: [False] action: [0] - reward : [2.66563283] - done: [False] action: [1] - reward : [3.66424437] - done: [False] action: [0] - reward : [2.66406838] - done: [False] action: [0] - reward : [2.67017804] - done: [False] action: [0] - reward : [2.64930747] - done: [False] default: 0.332 - penalty: -1.81 reward: 2.66 - client: 466.0 month: 7 - done: True action: [0] - reward : [2.71728266] - done: [False] action: [1] - reward : [3.73727375] - done: [False] action: [0] - reward : [2.72948953] - done: [False] action: [0] - reward : [2.73886225] - done: [False] action: [0] - reward : [2.73883143] - done: [False] action: [0] - reward : [2.72394682] - done: [False] default: 0.318 - penalty: -1.77 reward: 2.72 - client: 176.0 month: 8 - done: True action: [0] - reward : [2.7231707] - done: [False] action: [0] - reward : [2.72078927] - done: [False] action: [0] - reward : [2.74662382] - done: [False] action: [0] - reward : [2.7365016] - done: [False] action: [0] - reward : [2.73727375] - done: [False] action: [1] - reward : [3.72078927] - done: [False] default: 0.318 - penalty: -1.69 reward: 2.74 - client: 379.0 month: 9 - done: True action: [0] - reward : [2.81825262] - done: [False] action: [1] - reward : [3.8380934] - done: [False] action: [0] - reward : [2.83057481] - done: [False] action: [0] - reward : [2.821006] - done: [False] action: [0] - reward : [2.83343132] - done: [False] action: [0] - reward : [2.82477014] - done: [False] default: 0.302 - penalty: -1.57 reward: -2.54 - client: 392.0 month: 10 - done: True action: [1] - reward : [3.76292615] - done: [False] action: [0] - reward : [2.73805771] - done: [False] action: [0] - reward : [2.73807738] - done: [False] action: [0] - reward : [2.74511804] - done: [False] action: [0] - reward : [-2.56834887] - done: [False] action: [0] - reward : [2.73495367] - done: [False] action: [0] - reward : [2.74745371] - done: [False] default: 0.317 - penalty: -1.67 reward: 2.75 - client: 533.0 month: 11 - done: True action: [1] - reward : [3.64284791] - done: [False] action: [0] - reward : [2.63996419] - done: [False] action: [0] - reward : [2.63903765] - done: [False] action: [1] - reward : [3.62343256] - done: [False] action: [0] - reward : [2.63500012] - done: [False] action: [1] - reward : [3.63455746] - done: [False] default: 0.337 - penalty: -1.79 reward: 2.64 - client: 577.0 month: 12 - done: True action: [0] - reward : [2.66409837] - done: [False] action: [1] - reward : [3.65105057] - done: [False] action: [0] - reward : [2.64149135] - done: [False] action: [0] - reward : [2.65397162] - done: [False] action: [0] - reward : [2.64149135] - done: [False] action: [0] - reward : [2.64786391] - done: [False] default: 0.333 - penalty: -1.77 reward: 3.66 - client: 52.0 month: 13 - done: True action: [0] - reward : [2.62912496] - done: [False] action: [1] - reward : [3.63903765] - done: [False] action: [1] - reward : [-1.60059089] - done: [False] action: [0] - reward : [2.62502145] - done: [False] action: [0] - reward : [2.63430901] - done: [False] action: [1] - reward : [3.63204896] - done: [False] default: 0.337 - penalty: -1.83 reward: 2.63 - client: 545.0 month: 14 - done: True action: [0] - reward : [2.59824902] - done: [False] action: [0] - reward : [2.59568134] - done: [False] action: [0] - reward : [2.60009048] - done: [False] action: [1] - reward : [-1.61965652] - done: [False] action: [0] - reward : [2.60143135] - done: [False] action: [0] - reward : [2.59188635] - done: [False] default: 0.345 - penalty: -1.89 reward: 2.59 - client: 151.0 month: 15 - done: True action: [1] - reward : [-1.59062407] - done: [False] action: [0] - reward : [2.64536878] - done: [False] action: [0] - reward : [2.64836539] - done: [False] action: [0] - reward : [-2.59602838] - done: [False] action: [1] - reward : [-1.60392596] - done: [False] action: [1] - reward : [3.65622883] - done: [False] default: 0.333 - penalty: -1.83 reward: 2.65 - client: 559.0 month: 1 - done: True action: [0] - reward : [2.60137372] - done: [False] action: [1] - reward : [3.6039271] - done: [False] action: [0] - reward : [2.60969004] - done: [False] action: [0] - reward : [2.60902962] - done: [False] action: [0] - reward : [2.61473346] - done: [False] action: [0] - reward : [2.60679532] - done: [False] default: 0.342 - penalty: -1.93 reward: -2.63 - client: 29.0 month: 2 - done: True action: [1] - reward : [3.60428888] - done: [False] action: [1] - reward : [3.62071333] - done: [False] action: [0] - reward : [2.61126392] - done: [False] action: [0] - reward : [2.59886165] - done: [False] action: [1] - reward : [3.6083826] - done: [False] action: [0] - reward : [2.61314549] - done: [False] default: 0.342 - penalty: -1.90 reward: 2.60 - client: 217.0 month: 3 - done: True action: [0] - reward : [2.5361559] - done: [False] action: [0] - reward : [-2.64483155] - done: [False] action: [0] - reward : [2.52975683] - done: [False] action: [1] - reward : [3.54205159] - done: [False] action: [0] - reward : [2.53728812] - done: [False] action: [0] - reward : [-2.64686036] - done: [False] default: 0.357 - penalty: -1.96 reward: 2.54 - client: 15.0 month: 4 - done: True action: [1] - reward : [3.72793356] - done: [False] action: [0] - reward : [2.72474185] - done: [False] action: [0] - reward : [2.72867948] - done: [False] action: [0] - reward : [2.72868368] - done: [False]
plot_default_rate_history(date_codes = date_codes,
actual = train_subset[default_col],
actions = act_ppo,
dates_from_codes = dates_from_codes,
save_path="../imgs/2_modeling/dfr_history_ppo.png",
title = "TRPO - Actual vs. Predicted")
plot_reward_history(reward = rew_ppo,
window_size=10,
save_path="../imgs/2_modeling/rew_history_ppo.png",
title = "TRPO - Reward")
plot_actions_running_mean(actions=act_ppo,
window_size=64,
save_path="../imgs/2_modeling/act_run_mean_ppo.png",
title="TRPO - Actions Running Mean (i.e. default rate)")
print("-"*30)
print(pd.DataFrame(confusion_matrix(train_subset[default_col].values, act_ppo), columns=["True 0", "True 1"], index=["Pred 0", "Pred 1"]))
print("-"*30)
print(classification_report(train_subset[default_col].values, act_ppo))
------------------------------
True 0 True 1
Pred 0 6047 404
Pred 1 542 2655
------------------------------
precision recall f1-score support
0.0 0.92 0.94 0.93 6451
1.0 0.87 0.83 0.85 3197
accuracy 0.90 9648
macro avg 0.89 0.88 0.89 9648
weighted avg 0.90 0.90 0.90 9648
# stage 1: prediction
model_gb = HistGradientBoostingClassifier(max_iter=12)
model_gb.fit(train_subset[features], train_subset[default_col])
probs_gb = model_gb.predict_proba(train_subset[features])
# stage 2: classification
act_gb = np.where(probs_gb[:,1]>=0.5, 1, 0) # <- needs tunning 0.5
print(classification_report(train_subset[default_col], act_gb))
precision recall f1-score support
0.0 1.00 1.00 1.00 6451
1.0 1.00 1.00 1.00 3197
accuracy 1.00 9648
macro avg 1.00 1.00 1.00 9648
weighted avg 1.00 1.00 1.00 9648
plot_default_rate_history(date_codes = date_codes,
actual = train_subset[default_col],
actions = act_gb,
dates_from_codes = dates_from_codes,
save_path="../imgs/2_modeling/dfr_history_gb.png",
title = "Gradient Boosting - Actual vs. Predicted (threshold = 0.5)")
# compute optimal threshold that maximizes r2
optimal_threshold = 0.0
max_r2 = float('-inf')
for threshold in np.linspace(0, 1, 101):
act_gb = np.where(probs_gb[:,1] >= threshold, 1, 0)
score = r2_score(train_subset[default_col], act_gb)
if score > max_r2:
max_r2 = score
optimal_threshold = threshold
# stage 2: classification
act_gb = np.where(probs_gb[:,1]>=optimal_threshold, 1, 0) # <- needs tunning 0.5
print(classification_report(train_subset[default_col], act_gb))
precision recall f1-score support
0.0 1.00 1.00 1.00 6451
1.0 1.00 1.00 1.00 3197
accuracy 1.00 9648
macro avg 1.00 1.00 1.00 9648
weighted avg 1.00 1.00 1.00 9648
plot_default_rate_history(date_codes = date_codes,
actual = train_subset[default_col],
actions = act_gb,
dates_from_codes = dates_from_codes,
title = f"Gradient Boosting - Actual vs. Predicted (Optimal Threshold: {optimal_threshold})")
default_rate_hisotry = pd.DataFrame({"date":[dates_from_codes[x] for x in date_codes],
"actual": train_subset[default_col],
"dqn":act_dqn,
"doubledqn":act_ddqn,
"dueldqn":act_dueldqn,
"a2c":act_a2c,
"trpo":act_trpo,
"ppo":act_ppo,
}).groupby("date").mean()
default_rate_hisotry
| actual | dqn | doubledqn | dueldqn | a2c | trpo | ppo | |
|---|---|---|---|---|---|---|---|
| date | |||||||
| 2020-03-31 | 0.333333 | 0.338308 | 0.333333 | 0.333333 | 0.318408 | 0.315091 | 0.298507 |
| 2020-04-30 | 0.341625 | 0.346600 | 0.343284 | 0.341625 | 0.326700 | 0.313433 | 0.291874 |
| 2020-05-31 | 0.341625 | 0.343284 | 0.341625 | 0.341625 | 0.328358 | 0.318408 | 0.296849 |
| 2020-06-30 | 0.356551 | 0.361526 | 0.361526 | 0.358209 | 0.339967 | 0.331675 | 0.300166 |
| 2020-07-31 | 0.318408 | 0.343284 | 0.341625 | 0.334992 | 0.316750 | 0.316750 | 0.291874 |
| 2020-08-31 | 0.336650 | 0.344942 | 0.351575 | 0.343284 | 0.315091 | 0.301824 | 0.315091 |
| 2020-09-30 | 0.331675 | 0.338308 | 0.341625 | 0.334992 | 0.321725 | 0.281924 | 0.311774 |
| 2020-10-31 | 0.318408 | 0.325041 | 0.328358 | 0.320066 | 0.303483 | 0.268657 | 0.308458 |
| 2020-11-30 | 0.318408 | 0.321725 | 0.321725 | 0.320066 | 0.301824 | 0.275290 | 0.318408 |
| 2020-12-31 | 0.301824 | 0.316750 | 0.315091 | 0.311774 | 0.290216 | 0.271973 | 0.325041 |
| 2021-01-31 | 0.316750 | 0.341625 | 0.336650 | 0.331675 | 0.300166 | 0.281924 | 0.325041 |
| 2021-02-28 | 0.336650 | 0.353234 | 0.338308 | 0.339967 | 0.313433 | 0.285240 | 0.334992 |
| 2021-03-31 | 0.333333 | 0.343284 | 0.343284 | 0.338308 | 0.305141 | 0.275290 | 0.330017 |
| 2021-04-30 | 0.336650 | 0.354892 | 0.346600 | 0.339967 | 0.313433 | 0.280265 | 0.334992 |
| 2021-05-31 | 0.344942 | 0.354892 | 0.351575 | 0.348259 | 0.318408 | 0.291874 | 0.341625 |
| 2021-06-30 | 0.334992 | 0.349917 | 0.346600 | 0.349917 | 0.321725 | 0.291874 | 0.348259 |
err_results = []
# Iterate over the columns in the DataFrame
for model in default_rate_hisotry.columns.drop('actual'):
# Calculate RMSE
rmse = mean_squared_error(default_rate_hisotry['actual'], default_rate_hisotry[model], squared=False)
#mean absolute error
mae = mean_absolute_error(default_rate_hisotry['actual'], default_rate_hisotry[model])
# Calculate R2 score
r2 = r2_score(default_rate_hisotry['actual'], default_rate_hisotry[model])
# Append the results to the list
err_results.append({'model': model, 'r2': r2, 'mae': mae})
# Convert the list of results to a DataFrame
results_df = pd.DataFrame(err_results)
# results_df.sort_values(by="rmse", ascending=True)
results_df.sort_values(by="r2", ascending=False)
| model | r2 | mae | |
|---|---|---|---|
| 2 | dueldqn | 0.654698 | 0.005390 |
| 1 | doubledqn | 0.284155 | 0.008810 |
| 0 | dqn | -0.003597 | 0.010987 |
| 3 | a2c | -0.880987 | 0.016687 |
| 5 | ppo | -3.165836 | 0.019900 |
| 4 | trpo | -8.648262 | 0.037521 |
default_rate_hisotry
| actual | dqn | doubledqn | dueldqn | a2c | trpo | ppo | |
|---|---|---|---|---|---|---|---|
| date | |||||||
| 2020-03-31 | 0.333333 | 0.338308 | 0.333333 | 0.333333 | 0.318408 | 0.315091 | 0.298507 |
| 2020-04-30 | 0.341625 | 0.346600 | 0.343284 | 0.341625 | 0.326700 | 0.313433 | 0.291874 |
| 2020-05-31 | 0.341625 | 0.343284 | 0.341625 | 0.341625 | 0.328358 | 0.318408 | 0.296849 |
| 2020-06-30 | 0.356551 | 0.361526 | 0.361526 | 0.358209 | 0.339967 | 0.331675 | 0.300166 |
| 2020-07-31 | 0.318408 | 0.343284 | 0.341625 | 0.334992 | 0.316750 | 0.316750 | 0.291874 |
| 2020-08-31 | 0.336650 | 0.344942 | 0.351575 | 0.343284 | 0.315091 | 0.301824 | 0.315091 |
| 2020-09-30 | 0.331675 | 0.338308 | 0.341625 | 0.334992 | 0.321725 | 0.281924 | 0.311774 |
| 2020-10-31 | 0.318408 | 0.325041 | 0.328358 | 0.320066 | 0.303483 | 0.268657 | 0.308458 |
| 2020-11-30 | 0.318408 | 0.321725 | 0.321725 | 0.320066 | 0.301824 | 0.275290 | 0.318408 |
| 2020-12-31 | 0.301824 | 0.316750 | 0.315091 | 0.311774 | 0.290216 | 0.271973 | 0.325041 |
| 2021-01-31 | 0.316750 | 0.341625 | 0.336650 | 0.331675 | 0.300166 | 0.281924 | 0.325041 |
| 2021-02-28 | 0.336650 | 0.353234 | 0.338308 | 0.339967 | 0.313433 | 0.285240 | 0.334992 |
| 2021-03-31 | 0.333333 | 0.343284 | 0.343284 | 0.338308 | 0.305141 | 0.275290 | 0.330017 |
| 2021-04-30 | 0.336650 | 0.354892 | 0.346600 | 0.339967 | 0.313433 | 0.280265 | 0.334992 |
| 2021-05-31 | 0.344942 | 0.354892 | 0.351575 | 0.348259 | 0.318408 | 0.291874 | 0.341625 |
| 2021-06-30 | 0.334992 | 0.349917 | 0.346600 | 0.349917 | 0.321725 | 0.291874 | 0.348259 |
ax = default_rate_hisotry.plot(figsize=(14, 5))
# Set line styles and colors
ax.lines[0].set_linestyle("-")
ax.lines[0].set_color("gray")
ax.lines[0].set_linewidth(3)
[x.set_linestyle(':') for x in ax.lines[1:]]
[x.set_linewidth(3) for x in ax.lines]
# Find the longest string length for model names
longest_model_name = max(len(model) for model in results_df.model)
# Find the maximum length of the MAE strings to align the decimal points
max_mae_length = max(len(f"{mae:.4f}") for mae in results_df.mae)
# Create legend labels with padded model names and aligned MAE values
legend_labels = ['Actual'] + [
f"MAE: {model}{' '*(longest_model_name - len(model))} : {mae:>{max_mae_length}.4f}"
for model, mae in zip(results_df.model, results_df.mae)
]
# Set legend with formatted labels using a monospaced font
ax.legend(legend_labels, loc='lower left', prop={'family': 'monospace', 'size': 12})
plt.title("Default Rate Predicted vs. Real", fontsize=20)
plt.ylabel("Default Rate", fontsize=15)
plt.xlabel("Date", fontsize=15)
# Set minimum y axis
plt.ylim(0.22, 0.365)
# Save figure before calling show()
plt.savefig("../imgs/2_modeling/predicted_vs_real_all.png", dpi=300)
# Show the plot
plt.show()
plt.figure(figsize=(10, 5))
ax = sns.kdeplot(data=default_rate_hisotry, shade=True, legend=True, common_norm=False, fill=True, linewidth=1.5, alpha=0.1)
plt.savefig("../imgs/2_modeling/default_rate_distribution_all.png", dpi=300)
#set dashed lines in sns.kdeplot(data=default_Rate_hisotry)
from sklearn.linear_model import Ridge, Lasso
model_ridge = Ridge(alpha=1e-4, fit_intercept=False, max_iter=1_000)
X_ensemble = default_rate_hisotry[["dqn", "doubledqn", "dueldqn", "a2c", "trpo", "ppo"]]
y_ensemble = default_rate_hisotry["actual"]
model_ridge.fit(X_ensemble,y_ensemble)
y_ensemble_pred = model_ridge.predict(X_ensemble)
a = pd.DataFrame({"date_codes":date_codes,
"actual": train_subset[default_col],
# "dqn":act_dqn,
# "doubledqn":act_ddqn,
# "dueldqn":act_dueldqn,
# "a2c":act_a2c,
# "trpo":act_trpo,
# "ppo":act_ppo,
# "gb":act_gb,
# "mean" : np.quantile([act_dqn, act_ddqn, act_dueldqn, act_a2c, act_trpo, act_ppo], q=0.525, axis=0),
}).groupby("date_codes").mean()
ax = a.plot(figsize=(10, 5),)
ax.plot(a.index, y_ensemble_pred, label="stack")
ax.lines[0].set_color("gray")
ax.lines[1].set_linestyle(":")
ax.lines[1].set_linewidth(3)
plt.legend()
# Set R2 score as title
plt.suptitle("Default Rate from Stacked Ensemble vs Real", fontsize=12)
plt.title(f"R2 Score: {r2_score(y_ensemble, y_ensemble_pred): .4f}", fontsize=12)
plt.ylabel("Default Rate", fontsize=12)
plt.xlabel("Date", fontsize=12)
plt.savefig("../imgs/2_modeling/default_rate_history Ensemble.png", dpi=300)
# plot vivolin plot using seaborn for all models in resb
plt.figure(figsize=(10, 5))
sns.violinplot(data=default_rate_hisotry[['actual','dqn', 'doubledqn', 'dueldqn', 'ppo', 'a2c', 'trpo']], alpha=0.5, palette="pastel", inner="point", orient="h")
plt.xlabel("Default Rate", fontsize=12)
plt.axvline(x=default_rate_hisotry.actual.mean(), color="red", linestyle="dashed", linewidth=2, label="zero")
plt.axvline(x=default_rate_hisotry.actual.quantile(.75), color="red", linestyle=":", label="zero")
plt.axvline(x=default_rate_hisotry.actual.quantile(.25), color="red", linestyle=":", label="zero")
plt.savefig("../imgs/2_modeling/default_rate_violin_all.png", dpi=300)
error = default_rate_hisotry.subtract(default_rate_hisotry["actual"], axis=0).drop("actual", axis=1)
plt.figure(figsize=(10, 5))
sns.violinplot(data=error[list(error.mean().sort_values().index)[::-1]], alpha=0.5, palette="pastel", inner="point", orient="h")
plt.axvline(x=0, color="lightgray", linestyle="-", label="zero")
plt.xlabel("Default Rate Error", fontsize=12)
plt.axvline(x=0, color="red", linestyle="dashed", linewidth=2, label="zero")
plt.savefig("../imgs/2_modeling/default_rate_error_violin_all.png", dpi=300)
folder_path = 'logs/binary/'
progress_csv = get_csv_files(folder_path)
df_progress = dict([(x.split('/')[-2], pd.read_csv(x)) for x in progress_csv])
plt.figure(figsize=(10, 5))
plt.plot(running_mean_last_n_samples(df_progress['dqn'][['train/loss']].fillna(0), 10), label="DQN")
plt.plot(running_mean_last_n_samples(df_progress['dueldqn'][['train/loss']].fillna(0), 10), label="Dueling DQN")
plt.plot(running_mean_last_n_samples(df_progress['ddqn'][['train/loss']].fillna(0), 10), label="Double DQN")
plt.title("Training Policy Loss", fontsize=12)
plt.ylabel("Loss", fontsize=12)
plt.xlabel("Timesteps", fontsize=12)
plt.legend(fontsize=12, loc="lower right")
plt.savefig("../imgs/2_modeling/policy_loss_dqn_family.png", dpi=300)
plt.figure(figsize=(10, 5))
ax = plt.gca()
ax.plot(running_mean_last_n_samples(df_progress['trpo']['train/policy_objective'].fillna(0), 3), label='TRPO')
ax.plot(running_mean_last_n_samples(df_progress['ppo']['train/policy_objective'].fillna(0), 3), label='PPO')
# Turn off gridlines for the primary y-axis
ax.grid(True)
# Create a twin axis for the third plot
ax2 = ax.twinx()
# Plot the third line on the secondary y-axis
ax2.plot(running_mean_last_n_samples(df_progress['a2c']['train/policy_loss'].fillna(0), 125), color='green', linestyle='dashed', label='A2C')
# Turn off gridlines for the secondary y-axis
ax2.grid(False)
ax2.set_ylabel("A2C Policy Loss")
ax.set_ylabel("TRPO, PPO Policy Loss")
# Optional: Add legend
ax.legend(loc='lower left')
ax2.legend(loc='upper right')
plt.savefig("../imgs/2_modeling/policy_loss_poligradient_family.png", dpi=300)
# Show the plot
plt.show()
plt.figure(figsize=(10, 5))
sns.kdeplot((running_mean_last_n_samples(df_progress['dqn'][['train/loss']].fillna(0), 10) + running_mean_last_n_samples(df_progress['dueldqn'][['train/loss']].fillna(0), 10) + running_mean_last_n_samples(df_progress['ddqn'][['train/loss']].fillna(0), 10)/3), label="Value-Based")
sns.kdeplot(((running_mean_last_n_samples(df_progress['trpo']['train/policy_objective'].fillna(0), 10) + running_mean_last_n_samples(df_progress['a2c']['train/policy_loss'].fillna(0), 132) + + running_mean_last_n_samples(df_progress['ppo']['train/policy_objective'].fillna(0), 1))/3), label="Policy-Based")
plt.legend()
plt.savefig("../imgs/2_modeling/policy_loss_kde_between_families.png", dpi=300)
plt.figure(figsize=(10, 5))
W = 100
plt.plot(running_mean_last_n_samples(rew_dqn, W).cumsum()/(np.arange(len(running_mean_last_n_samples(rew_dqn, W).cumsum()))+1) , color="red", linestyle="-")
plt.plot(running_mean_last_n_samples(rew_ddqn, W).cumsum()/(np.arange(len(running_mean_last_n_samples(rew_ddqn, W).cumsum()))+1) , color="blue", linestyle="-")
plt.plot(running_mean_last_n_samples(rew_dueldqn, W).cumsum()/(np.arange(len(running_mean_last_n_samples(rew_dueldqn, W).cumsum()))+1) , color="green", linestyle="-")
plt.plot(running_mean_last_n_samples(rew_a2c, W).cumsum()/(np.arange(len(running_mean_last_n_samples(rew_a2c, W).cumsum()))+1) , color="purple", linestyle="-.")
plt.plot(running_mean_last_n_samples(rew_trpo, W).cumsum()/(np.arange(len(running_mean_last_n_samples(rew_trpo, W).cumsum()))+1) , color="orange", linestyle="-.")
plt.plot(running_mean_last_n_samples(rew_ppo, W).cumsum()/(np.arange(len(running_mean_last_n_samples(rew_ppo, W).cumsum()))+1) , color="black", linestyle="-.")
plt.xlim([-570, 3000])
plt.axvline(0, color="gray", linestyle="--")
plt.legend(["DQN", "DDQN", "DUEL-DDQN", "A2C", "TRPO", "PPO"], loc="lower left")
plt.ylabel("Cumulative Reward", fontsize=12)
plt.xlabel("Episodes", fontsize=12)
plt.savefig("../imgs/2_modeling/smoothed_cumulative_rewards_train.png", dpi=300, bbox_inches='tight')
plt.title("Smoothed Cumulative Reward", fontsize=14)
Text(0.5, 1.0, 'Smoothed Cumulative Reward')
plt.figure(figsize=(10, 5))
plt.plot(running_mean_last_n_samples(rew_dqn, 250) , color="blue", linestyle="-")
plt.plot(running_mean_last_n_samples(rew_ddqn, 250) , color="green", linestyle="-")
plt.plot(running_mean_last_n_samples(rew_dueldqn, 250) , color="purple", linestyle="-.")
plt.plot(running_mean_last_n_samples(rew_a2c, 250) , color="orange", linestyle="-.")
plt.plot(running_mean_last_n_samples(rew_ppo, 250) , color="black", linestyle="-.")
plt.plot(running_mean_last_n_samples(rew_trpo, 250) , color="gray", linestyle="-.")
plt.xlim([-550, 3000])
plt.axvline(0, color="gray", linestyle="--")
plt.ylabel("Absolute Rewards", fontsize=12)
plt.xlabel("Episodes", fontsize=12)
plt.legend(["DQN", "DDQN", "DUELDDQN", "A2C", "TRPO", "PPO"], loc="lower left")
plt.savefig("../imgs/2_modeling/smoothed_absolute_rewards_train.png", dpi=300, bbox_inches='tight')
fpath = "../data/dataset_1/processed/test_subset.csv"
test_subset_original = pd.read_csv(fpath)
test_subset = test_subset_original.copy()
test_subset.head()
| mispay_days | date_code | mispay_d_90d_12m | total_balance | client_id | income | age | mortgage | mortgage_qty | maturity_months | monthly_payment | subsidy | interest_rate | default_120_12m | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 4.0 | 16 | 22.0 | 27547.84 | 0.0 | 947.62899 | 47.0 | 41657.5 | 1.0 | 386.0 | 238.72 | 0 | 6.0 | 0.0 |
| 1 | 11.0 | 17 | 22.0 | 27470.91 | 0.0 | 947.62899 | 47.0 | 41657.5 | 1.0 | 386.0 | 238.72 | 0 | 6.0 | 0.0 |
| 2 | 5.0 | 18 | 22.0 | 27402.76 | 0.0 | 947.62899 | 47.0 | 41657.5 | 1.0 | 386.0 | 238.72 | 0 | 6.0 | 0.0 |
| 3 | 7.0 | 19 | 23.0 | 27334.26 | 0.0 | 947.62899 | 47.0 | 41657.5 | 1.0 | 386.0 | 238.72 | 0 | 6.0 | 0.0 |
| 4 | 8.0 | 20 | 23.0 | 27260.85 | 0.0 | 947.62899 | 47.0 | 41657.5 | 1.0 | 386.0 | 238.72 | 0 | 6.0 | 0.0 |
test_subset = test_subset.sort_values(by=['date_code', 'client_id']).set_index(["date_code", "client_id"])
# RBF feature transformation
FEATURE_EXPANSION = False
if FEATURE_EXPANSION:
test_transformed = pd.DataFrame(rbf_feature.transform(test_subset[original_features]), columns=features)
test_subset = test_subset[["default_120_12m"]].reset_index().merge(test_transformed, left_index=True, right_index=True)
test_subset = test_subset.sort_values(by=['date_code', 'client_id']).set_index(["date_code", "client_id"])
SCALE = True
if SCALE:
features = original_features if not FEATURE_EXPANSION else features
test_subset[features] = scaler.transform(test_subset[features])
test_subset[features] = test_subset[features].clip(-1, 1)
test_date_map = dict(zip(test_subset.index.get_level_values(0).unique(), range(len(test_subset.index.get_level_values(0).unique()))))
test_subset = test_subset.reset_index()
test_subset['date_code'] = test_subset['date_code'].map(test_date_map)
test_subset = test_subset.set_index(["date_code", "client_id"])
test_subset.head()
| mispay_days | mispay_d_90d_12m | total_balance | income | age | mortgage | mortgage_qty | maturity_months | monthly_payment | subsidy | interest_rate | default_120_12m | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| date_code | client_id | ||||||||||||
| 0 | 0.0 | -0.994949 | -0.973430 | -0.930714 | -0.951302 | -0.098039 | -0.923808 | -1.0 | -0.086888 | -0.930482 | -1.0 | -0.076923 | 0.0 |
| 1.0 | -1.000000 | -0.834541 | -0.981803 | -0.932394 | 0.215686 | -0.995606 | -1.0 | -0.131122 | -0.989217 | -1.0 | 0.307692 | 1.0 | |
| 2.0 | -1.000000 | -0.998792 | -0.909692 | -0.984283 | -0.647059 | -0.927198 | -1.0 | -0.083728 | -0.967763 | 1.0 | -0.384615 | 0.0 | |
| 3.0 | -0.997475 | -0.824879 | -0.927912 | -0.956700 | -0.137255 | -0.919504 | -1.0 | -0.169036 | -0.920021 | -1.0 | -0.153846 | 1.0 | |
| 4.0 | -1.000000 | -0.987923 | -0.874285 | -0.997007 | -0.647059 | -0.896277 | -1.0 | 0.339652 | -0.949454 | 1.0 | -0.384615 | 0.0 |
LEARNING_STEPS = len(test_subset)
# Define environment configuration
kwargs = {
'features_col': features, # feature colums
'default_col': 'default_120_12m', # default column
'obs_dim': len(features),
'action_dim' : 2, # 0,1
'client_dim': len(test_subset.loc[0].index.unique()), # number of clients
'model_name': "test"
}
TestCustomEnv = RiskManagementEnvMonthlyEpisodes
num_envs = 1
env_fns = [lambda : CustomEnv(test_subset, debug=True, scaled_features=SCALE, rng = rng, seed=SEED, accepts_discrete_action=True, **kwargs)for i in range(num_envs)]
test_env = SubprocVecEnv(env_fns)
test_dqn_act, test_date_codes = predict_from_frame(model_dqn, test_subset[features])
test_ddqn_act, _ = predict_from_frame(model_ddqn, test_subset[features])
test_duelddqn_act, _ = predict_from_frame(model_dueldqn, test_subset[features])
test_a2c_act, _ = predict_from_frame(model_a2c, test_subset[features])
test_trpo_act, _ = predict_from_frame(model_trpo, test_subset[features])
test_ppo_act, _ = predict_from_frame(model_ppo, test_subset[features])
test_dqn_rew, _ = predict_from_env(model_dqn, test_env, n_steps=len(test_subset))
test_ddqn_rew, _ = predict_from_env(model_ddqn, test_env, n_steps=len(test_subset))
test_dueldqn_rew, _ = predict_from_env(model_dueldqn, test_env, n_steps=len(test_subset))
test_a2c_rew, _ = predict_from_env(model_a2c, test_env, n_steps=len(test_subset))
test_trpo_rew, _ = predict_from_env(model_trpo, test_env, n_steps=len(test_subset))
test_ppo_rew, _ = predict_from_env(model_ppo, test_env, n_steps=len(test_subset))
action: [1] - reward : [3.8756853] - done: [False] action: [1] - reward : [3.57299165] - done: [False] action: [0] - reward : [2.57516539] - done: [False] action: [1] - reward : [3.57674817] - done: [False] action: [0] - reward : [2.58736918] - done: [False] action: [1] - reward : [3.58207005] - done: [False] action: [0] - reward : [2.56981448] - done: [False] default: 0.350 - penalty: -1.90 reward: 2.57 - client: 207.0 month: 1 - done: True action: [1] - reward : [3.53728812] - done: [False] action: [1] - reward : [3.54363993] - done: [False] action: [0] - reward : [2.54681739] - done: [False] action: [0] - reward : [2.54363993] - done: [False] action: [1] - reward : [3.54894734] - done: [False] action: [0] - reward : [2.55053061] - done: [False] default: 0.357 - penalty: -1.89 reward: 3.55 - client: 394.0 month: 2 - done: True action: [1] - reward : [3.5863811] - done: [False] action: [0] - reward : [2.56064] - done: [False] action: [0] - reward : [2.58049276] - done: [False] action: [0] - reward : [2.57576281] - done: [False] action: [1] - reward : [3.57891579] - done: [False] action: [0] - reward : [2.5741868] - done: [False] default: 0.350 - penalty: -1.84 reward: 2.58 - client: 470.0 month: 3 - done: True action: [0] - reward : [2.61155769] - done: [False] action: [0] - reward : [2.62228921] - done: [False] action: [0] - reward : [2.59659369] - done: [False] action: [1] - reward : [3.61756239] - done: [False] action: [0] - reward : [2.62484664] - done: [False] action: [0] - reward : [2.6128381] - done: [False] default: 0.342 - penalty: -1.79 reward: 3.62 - client: 588.0 month: 4 - done: True action: [1] - reward : [3.5666382] - done: [False] action: [0] - reward : [2.57241294] - done: [False] action: [0] - reward : [2.5863811] - done: [False] action: [0] - reward : [2.55843481] - done: [False] action: [0] - reward : [2.57516539] - done: [False] action: [0] - reward : [2.57516539] - done: [False] default: 0.350 - penalty: -1.92 reward: 2.57 - client: 26.0 month: 1 - done: True action: [0] - reward : [2.5415845] - done: [False] action: [1] - reward : [3.5361559] - done: [False] action: [0] - reward : [2.53295604] - done: [False] action: [0] - reward : [2.53512208] - done: [False] action: [0] - reward : [2.54578171] - done: [False] action: [0] - reward : [2.53887567] - done: [False] default: 0.357 - penalty: -1.87 reward: 3.55 - client: 375.0 month: 2 - done: True action: [0] - reward : [2.57327188] - done: [False] action: [1] - reward : [3.57299165] - done: [False] action: [1] - reward : [3.56704311] - done: [False] action: [0] - reward : [2.58625031] - done: [False] action: [1] - reward : [3.56822623] - done: [False] action: [0] - reward : [2.56704311] - done: [False] default: 0.350 - penalty: -1.88 reward: 3.57 - client: 191.0 month: 3 - done: True action: [1] - reward : [3.60515961] - done: [False] action: [1] - reward : [3.59659369] - done: [False] action: [0] - reward : [2.60615485] - done: [False] action: [0] - reward : [2.60997006] - done: [False] action: [0] - reward : [2.61820216] - done: [False] action: [1] - reward : [3.62354197] - done: [False] default: 0.342 - penalty: -1.89 reward: 3.60 - client: 366.0 month: 4 - done: True action: [0] - reward : [3.60296731] - done: [ True] action: [1] - reward : [3.57400813] - done: [False] action: [1] - reward : [3.5741868] - done: [False] action: [0] - reward : [2.58108341] - done: [False] action: [0] - reward : [2.59417553] - done: [False] action: [0] - reward : [2.56384133] - done: [False] action: [0] - reward : [2.56922303] - done: [False] default: 0.350 - penalty: -1.85 reward: 2.58 - client: 248.0 month: 1 - done: True action: [1] - reward : [3.52597267] - done: [False] action: [0] - reward : [2.55211417] - done: [False] action: [1] - reward : [3.54415826] - done: [False] action: [1] - reward : [3.54468744] - done: [False] action: [1] - reward : [3.54736437] - done: [False] action: [0] - reward : [2.54363993] - done: [False] default: 0.357 - penalty: -2.00 reward: 2.53 - client: 139.0 month: 2 - done: True action: [0] - reward : [2.5530838] - done: [False] action: [0] - reward : [2.58325103] - done: [False] action: [0] - reward : [2.568837] - done: [False] action: [0] - reward : [2.57891579] - done: [False] action: [0] - reward : [2.5857972] - done: [False] action: [0] - reward : [2.57458056] - done: [False] default: 0.350 - penalty: -1.85 reward: 3.58 - client: 528.0 month: 3 - done: True action: [0] - reward : [2.60986863] - done: [False] action: [0] - reward : [2.61155769] - done: [False] action: [0] - reward : [2.61756239] - done: [False] action: [0] - reward : [2.61598734] - done: [False] action: [0] - reward : [2.6036213] - done: [False] action: [1] - reward : [3.61756239] - done: [False] default: 0.342 - penalty: -1.74 reward: 2.63 - client: 381.0 month: 4 - done: True action: [1] - reward : [3.59484172] - done: [False] action: [1] - reward : [3.56443941] - done: [False] action: [0] - reward : [2.5628452] - done: [False] action: [1] - reward : [3.56384133] - done: [False] action: [0] - reward : [2.57024532] - done: [False] action: [1] - reward : [3.56981448] - done: [False] default: 0.350 - penalty: -1.88 reward: 2.57 - client: 560.0 month: 1 - done: True action: [1] - reward : [3.54812542] - done: [False] action: [0] - reward : [2.54149816] - done: [False] action: [0] - reward : [2.53775606] - done: [False] action: [0] - reward : [2.54999584] - done: [False] action: [1] - reward : [3.53721461] - done: [False] action: [0] - reward : [2.54468744] - done: [False] default: 0.357 - penalty: -1.88 reward: 3.55 - client: 459.0 month: 2 - done: True action: [1] - reward : [3.57223289] - done: [False] action: [1] - reward : [3.5628452] - done: [False] action: [1] - reward : [3.58794675] - done: [False] action: [1] - reward : [3.56922303] - done: [False] action: [0] - reward : [2.57516539] - done: [False] action: [0] - reward : [2.57951288] - done: [False] default: 0.350 - penalty: -1.90 reward: 2.57 - client: 173.0 month: 3 - done: True action: [0] - reward : [2.60202193] - done: [False] action: [0] - reward : [2.60202193] - done: [False] action: [0] - reward : [2.62228921] - done: [False] action: [1] - reward : [3.60997006] - done: [False] action: [0] - reward : [2.60679532] - done: [False] action: [1] - reward : [3.61417187] - done: [False] action: [1] - reward : [3.60296731] - done: [False] default: 0.342 - penalty: -1.77 reward: 3.62 - client: 214.0 month: 4 - done: True action: [0] - reward : [2.59043229] - done: [False] action: [1] - reward : [3.58039045] - done: [False] action: [0] - reward : [2.56822623] - done: [False] action: [0] - reward : [2.57504943] - done: [False] action: [0] - reward : [2.57200064] - done: [False] action: [0] - reward : [2.55487681] - done: [False] default: 0.350 - penalty: -1.87 reward: 3.57 - client: 49.0 month: 1 - done: True action: [1] - reward : [3.54971912] - done: [False] action: [1] - reward : [3.54363993] - done: [False] action: [0] - reward : [2.5426173] - done: [False] action: [0] - reward : [2.54149816] - done: [False] action: [0] - reward : [2.53184829] - done: [False] action: [0] - reward : [2.53990382] - done: [False] default: 0.357 - penalty: -1.90 reward: 2.54 - client: 519.0 month: 2 - done: True action: [1] - reward : [3.56907739] - done: [False] action: [0] - reward : [2.57358288] - done: [False] action: [0] - reward : [2.57200064] - done: [False] action: [1] - reward : [3.5857972] - done: [False] action: [0] - reward : [2.57103579] - done: [False] action: [0] - reward : [2.55903951] - done: [False] default: 0.350 - penalty: -1.87 reward: 2.57 - client: 158.0 month: 3 - done: True action: [1] - reward : [3.60599308] - done: [False] action: [1] - reward : [3.6077488] - done: [False] action: [0] - reward : [2.61820216] - done: [False] action: [0] - reward : [2.62071333] - done: [False] action: [0] - reward : [2.59272571] - done: [False] action: [0] - reward : [2.60744913] - done: [False] default: 0.342 - penalty: -1.88 reward: 2.60 - client: 250.0 month: 4 - done: True action: [0] - reward : [2.57173688] - done: [False] action: [0] - reward : [2.57173688] - done: [False] action: [0] - reward : [2.57991453] - done: [False] action: [0] - reward : [2.56187549] - done: [False] action: [1] - reward : [3.5672556] - done: [False] action: [1] - reward : [3.57200064] - done: [False] action: [0] - reward : [2.57299165] - done: [False] default: 0.350 - penalty: -1.85 reward: 2.58 - client: 399.0 month: 1 - done: True action: [1] - reward : [-1.62648027] - done: [False] action: [1] - reward : [3.53990382] - done: [False] action: [0] - reward : [2.52862396] - done: [False] action: [0] - reward : [2.5340021] - done: [False] action: [0] - reward : [-2.64483155] - done: [False] action: [0] - reward : [2.54363993] - done: [False] default: 0.357 - penalty: -1.94 reward: 2.54 - client: 273.0 month: 2 - done: True action: [0] - reward : [2.5626535] - done: [False] action: [0] - reward : [2.57358288] - done: [False] action: [0] - reward : [2.56981448] - done: [False] action: [0] - reward : [-2.63200882] - done: [False] action: [0] - reward : [2.58108341] - done: [False] action: [1] - reward : [3.5741868] - done: [False] default: 0.350 - penalty: -1.89 reward: 2.57 - client: 271.0 month: 3 - done: True action: [1] - reward : [3.60280331] - done: [False] action: [0] - reward : [2.60744913] - done: [False] action: [1] - reward : [3.6128381] - done: [False] action: [1] - reward : [-1.60730555] - done: [False] action: [0] - reward : [2.61851746] - done: [False] action: [0] - reward : [2.60679532] - done: [False] default: 0.342 - penalty: -1.75 reward: 2.63 - client: 441.0 month: 4 - done: True action: [0] - reward : [2.592637] - done: [False] action: [0] - reward : [-2.62562967] - done: [False] action: [0] - reward : [2.5628452] - done: [False] action: [1] - reward : [3.56762832] - done: [False] action: [0] - reward : [2.56443941] - done: [False] action: [1] - reward : [3.5628452] - done: [False] default: 0.350 - penalty: -1.86 reward: 2.58 - client: 384.0 month: 1 - done: True action: [1] - reward : [-1.62908171] - done: [False] action: [0] - reward : [2.55030573] - done: [False] action: [1] - reward : [3.5361559] - done: [False] action: [1] - reward : [-1.64699144] - done: [False] action: [1] - reward : [3.5430927] - done: [False] action: [1] - reward : [3.53935638] - done: [False] action: [0] - reward : [2.5361559] - done: [False] default: 0.357 - penalty: -1.87 reward: 3.55 - client: 456.0 month: 2 - done: True action: [1] - reward : [3.5743623] - done: [False] action: [1] - reward : [3.56981448] - done: [False] action: [0] - reward : [2.56125117] - done: [False] action: [0] - reward : [2.55200735] - done: [False] action: [1] - reward : [3.5741868] - done: [False] action: [0] - reward : [2.58364765] - done: [False] default: 0.350 - penalty: -1.88 reward: 2.57 - client: 22.0 month: 3 - done: True action: [0] - reward : [2.60439814] - done: [False] action: [0] - reward : [-2.60610249] - done: [False] action: [0] - reward : [2.61885509] - done: [False] action: [0] - reward : [2.60997006] - done: [False] action: [1] - reward : [3.61377251] - done: [False] action: [0] - reward : [2.60679532] - done: [False] default: 0.342 - penalty: -1.92 reward: 2.60 - client: 244.0 month: 4 - done: True action: [1] - reward : [-1.63243231] - done: [False] action: [0] - reward : [2.57024532] - done: [False] action: [0] - reward : [2.56443941] - done: [False] action: [0] - reward : [2.56064] - done: [False] action: [0] - reward : [2.58522555] - done: [False] action: [1] - reward : [3.56704311] - done: [False] default: 0.350 - penalty: -1.87 reward: 2.57 - client: 221.0 month: 1 - done: True action: [1] - reward : [3.54932418] - done: [False] action: [1] - reward : [3.53455589] - done: [False] action: [0] - reward : [2.5404635] - done: [False] action: [0] - reward : [2.5340021] - done: [False] action: [1] - reward : [3.53135635] - done: [False] action: [1] - reward : [3.53990382] - done: [False] default: 0.357 - penalty: -1.89 reward: 2.55 - client: 413.0 month: 2 - done: True action: [0] - reward : [2.57010348] - done: [False]
plt.figure(figsize=(10, 5))
W = 100
plt.plot(running_mean_last_n_samples(test_dqn_rew, W).cumsum()/(np.arange(len(running_mean_last_n_samples(test_dqn_rew, W).cumsum()))+1) , color="red", linestyle="-")
plt.plot(running_mean_last_n_samples(test_ddqn_rew, W).cumsum()/(np.arange(len(running_mean_last_n_samples(test_ddqn_rew, W).cumsum()))+1) , color="blue", linestyle="-")
plt.plot(running_mean_last_n_samples(test_dueldqn_rew, W).cumsum()/(np.arange(len(running_mean_last_n_samples(test_dueldqn_rew, W).cumsum()))+1) , color="green", linestyle="-")
plt.plot(running_mean_last_n_samples(test_a2c_rew, W).cumsum()/(np.arange(len(running_mean_last_n_samples(test_a2c_rew, W).cumsum()))+1) , color="purple", linestyle="-.")
plt.plot(running_mean_last_n_samples(test_trpo_rew, W).cumsum()/(np.arange(len(running_mean_last_n_samples(test_trpo_rew, W).cumsum()))+1) , color="orange", linestyle="-.")
plt.plot(running_mean_last_n_samples(test_ppo_rew, W).cumsum()/(np.arange(len(running_mean_last_n_samples(test_ppo_rew, W).cumsum()))+1) , color="black", linestyle="-.")
plt.xlim([-550, 3000])
plt.axvline(0, color="gray", linestyle="--")
plt.legend(["DQN", "DDQN", "DUEL-DDQN", "A2C", "TRPO", "PPO"], loc="lower left")
plt.ylabel("Cumulative Reward", fontsize=12)
plt.xlabel("Episodes", fontsize=12)
plt.savefig("../imgs/2_modeling/smoothed_cumulative_rewards_test.png", dpi=300, bbox_inches='tight')
plt.title("Smoothed Cumulative Reward", fontsize=14)
Text(0.5, 1.0, 'Smoothed Cumulative Reward')
plt.figure(figsize=(10, 5))
plt.plot(running_mean_last_n_samples(test_ddqn_rew, 250) , color="blue", linestyle="-")
plt.plot(running_mean_last_n_samples(test_dueldqn_rew, 250) , color="green", linestyle="-")
plt.plot(running_mean_last_n_samples(test_a2c_rew, 250) , color="purple", linestyle="-.")
plt.plot(running_mean_last_n_samples(test_trpo_rew, 250) , color="orange", linestyle="-.")
plt.plot(running_mean_last_n_samples(test_ppo_rew, 250) , color="black", linestyle="-.")
plt.xlim([-550, 3000])
plt.axvline(0, color="gray", linestyle="--")
plt.ylabel("Absolute Rewards", fontsize=12)
plt.xlabel("Episodes", fontsize=12)
plt.legend(["DQN", "DDQN", "DUELDDQN", "A2C", "TRPO", "PPO"], loc="lower left")
plt.savefig("../imgs/2_modeling/smoothed_absolute_rewards_test.png", dpi=300, bbox_inches='tight')
test_gb_preds = model_gb.predict(test_subset[features])
print(classification_report(test_subset.default_120_12m.values,test_gb_preds))
precision recall f1-score support
0.0 1.00 1.00 1.00 1969
1.0 1.00 1.00 1.00 1046
accuracy 1.00 3015
macro avg 1.00 1.00 1.00 3015
weighted avg 1.00 1.00 1.00 3015
# print classification report for all test_rew
print("-"*30, "test_dqn_act", "-"*30)
print(classification_report(test_subset.default_120_12m.values, test_dqn_act))
print("-"*30, "test_ppo_act", "-"*30)
print(classification_report(test_subset.default_120_12m.values, test_ppo_act))
print("-"*30, "test_a2c_act", "-"*30)
print(classification_report(test_subset.default_120_12m.values, test_a2c_act))
print("-"*30, "test_trpo_act", "-"*30)
print(classification_report(test_subset.default_120_12m.values, test_trpo_act))
print("-"*30, "test_ddqn_act", "-"*30)
print(classification_report(test_subset.default_120_12m.values, test_ddqn_act))
print("-"*30, "test_dueldqn_act", "-"*30)
print(classification_report(test_subset.default_120_12m.values, test_duelddqn_act))
print("-"*30, "test_gb_preds", "-"*30)
test_gb_preds = model_gb.predict(test_subset[features])
print(classification_report(test_subset.default_120_12m.values,test_gb_preds))
------------------------------ test_dqn_act ------------------------------
precision recall f1-score support
0.0 1.00 0.98 0.99 1969
1.0 0.96 0.99 0.98 1046
accuracy 0.98 3015
macro avg 0.98 0.99 0.98 3015
weighted avg 0.98 0.98 0.98 3015
------------------------------ test_ppo_act ------------------------------
precision recall f1-score support
0.0 0.94 0.94 0.94 1969
1.0 0.89 0.89 0.89 1046
accuracy 0.92 3015
macro avg 0.91 0.92 0.91 3015
weighted avg 0.92 0.92 0.92 3015
------------------------------ test_a2c_act ------------------------------
precision recall f1-score support
0.0 0.96 0.99 0.97 1969
1.0 0.97 0.93 0.95 1046
accuracy 0.97 3015
macro avg 0.97 0.96 0.96 3015
weighted avg 0.97 0.97 0.97 3015
------------------------------ test_trpo_act ------------------------------
precision recall f1-score support
0.0 0.89 0.93 0.91 1969
1.0 0.86 0.79 0.83 1046
accuracy 0.88 3015
macro avg 0.88 0.86 0.87 3015
weighted avg 0.88 0.88 0.88 3015
------------------------------ test_ddqn_act ------------------------------
precision recall f1-score support
0.0 0.99 0.99 0.99 1969
1.0 0.98 0.99 0.99 1046
accuracy 0.99 3015
macro avg 0.99 0.99 0.99 3015
weighted avg 0.99 0.99 0.99 3015
------------------------------ test_dueldqn_act ------------------------------
precision recall f1-score support
0.0 1.00 0.99 1.00 1969
1.0 0.99 0.99 0.99 1046
accuracy 0.99 3015
macro avg 0.99 0.99 0.99 3015
weighted avg 0.99 0.99 0.99 3015
------------------------------ test_gb_preds ------------------------------
precision recall f1-score support
0.0 1.00 1.00 1.00 1969
1.0 1.00 1.00 1.00 1046
accuracy 1.00 3015
macro avg 1.00 1.00 1.00 3015
weighted avg 1.00 1.00 1.00 3015
original_test_datecodes = list(test_date_map.keys())
test_datecodes = [dates_from_codes[code] for code in original_test_datecodes]
test_subset_original.shape, test_subset.shape, len(test_dqn_act)
((3015, 14), (3015, 12), 3015)
test_subset_original["date_code"].map(dict(zip(original_test_datecodes, test_datecodes)))
0 2021-07-31
1 2021-08-31
2 2021-09-30
3 2021-10-31
4 2021-11-30
...
3010 2021-07-31
3011 2021-08-31
3012 2021-09-30
3013 2021-10-31
3014 2021-11-30
Name: date_code, Length: 3015, dtype: object
test_default_rate_hisotry = pd.DataFrame({"date": test_subset_original["date_code"].map(dict(zip(original_test_datecodes, test_datecodes))),
"actual": test_subset[default_col].values,
"dqn": test_dqn_act,
"ddqn": test_ddqn_act,
"dueldqn": test_duelddqn_act,
"a2c": test_a2c_act,
"trpo": test_trpo_act,
"ppo": test_ppo_act
# "mean" : np.quantile([test_dqn_act, test_ddqn_act, test_duelddqn_act, test_a2c_act, test_trpo_act], q=0.593, axis=0),
})
test_dr = test_default_rate_hisotry.groupby("date").mean()
test_dr
| actual | dqn | ddqn | dueldqn | a2c | trpo | ppo | |
|---|---|---|---|---|---|---|---|
| date | |||||||
| 2021-07-31 | 0.348259 | 0.359867 | 0.351575 | 0.348259 | 0.331675 | 0.320066 | 0.353234 |
| 2021-08-31 | 0.339967 | 0.353234 | 0.346600 | 0.346600 | 0.326700 | 0.308458 | 0.339967 |
| 2021-09-30 | 0.354892 | 0.358209 | 0.354892 | 0.356551 | 0.336650 | 0.325041 | 0.358209 |
| 2021-10-31 | 0.343284 | 0.353234 | 0.341625 | 0.344942 | 0.325041 | 0.316750 | 0.341625 |
| 2021-11-30 | 0.348259 | 0.359867 | 0.349917 | 0.348259 | 0.336650 | 0.318408 | 0.349917 |
test_err_results = []
# Iterate over the columns in the DataFrame
for model in test_dfr.columns.drop('actual'):
# Calculate RMSE
rmse = mean_squared_error(test_dfr['actual'], test_dfr[model], squared=False)
#mean absolute error
mae = mean_absolute_error(test_dfr['actual'], test_dfr[model])
rmse = mean_squared_error(test_dfr['actual'], test_dfr[model], squared=False)
# Calculate R2 score
r2 = r2_score(test_dfr['actual'], test_dfr[model])
# Append the results to the list
test_err_results.append({'model': model, 'r2': r2, 'mae': mae, 'rmse': rmse})
# Convert the list of results to a DataFrame
test_results_df = pd.DataFrame(test_err_results)
# results_df.sort_values(by="rmse", ascending=True)
test_results_df.sort_values(by="r2", ascending=False)
| model | r2 | mae | rmse | |
|---|---|---|---|---|
| 5 | ppo | 0.679487 | 0.002322 | 0.002872 |
| 2 | dueldqn | 0.615385 | 0.001990 | 0.003147 |
| 1 | ddqn | 0.529915 | 0.002653 | 0.003479 |
| 0 | dqn | -3.316239 | 0.009950 | 0.010541 |
| 3 | a2c | -8.722222 | 0.015589 | 0.015820 |
| 4 | trpo | -32.205128 | 0.029187 | 0.029236 |
ax = test_dfr.plot(figsize=(14, 5))
# Set line styles and colors
ax.lines[0].set_linestyle("-")
ax.lines[0].set_color("gray")
ax.lines[0].set_linewidth(3)
[x.set_linestyle(':') for x in ax.lines[1:]]
[x.set_linewidth(2) for x in ax.lines[1:]]
# Find the longest string length for model names
longest_model_name = max(len(model) for model in test_results_df.model)
# Find the maximum length of the MAE strings to align the decimal points
max_mae_length = max(len(f"{mae:.4f}") for mae in test_results_df.mae)
# Create legend labels with padded model names and aligned MAE values
legend_labels = ['Actual'] + [
f"MAE: {model}{' '*(longest_model_name - len(model))} : {mae:>{max_mae_length}.4f}"
for model, mae in zip(test_results_df.model, test_results_df.mae)
]
# Set legend with formatted labels using a monospaced font
# set legend alpha
ax.legend(legend_labels, loc='lower left', prop={'family': 'monospace', 'size': 10})
plt.title("Default Rate Predicted vs. Real", fontsize=20)
plt.ylabel("Default Rate", fontsize=15)
plt.xlabel("Date", fontsize=15)
# Set minimum y axis
plt.ylim(0.27, 0.365)
# Save figure before calling show()
plt.savefig("../imgs/2_modeling/test_default_rate.png", dpi=300)
# Show the plot
plt.show()
plt.show()
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng = rng, accepts_discrete_action=True, **kwargs) for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
dqn_mean_reward, dqn_std_reward = evaluate_policy(model_dqn, training_env, n_eval_episodes=15*1,return_episode_rewards=True)
default: 0.333 - penalty: -1.83 reward: 2.65 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.90 reward: 3.60 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.83 reward: 3.61 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.88 reward: 3.55 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.68 reward: 2.74 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.82 reward: 2.64 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.84 reward: 2.65 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.65 reward: 2.74 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.66 reward: 2.74 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.61 reward: 2.83 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.73 reward: 2.74 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.68 reward: 3.66 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.78 reward: 2.65 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.81 reward: 3.64 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.89 reward: 2.59 - client: 173.0 month: 15 - done: True
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng = rng, accepts_discrete_action=True, **kwargs) for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
ddqn_mean_reward, ddqn_std_reward = evaluate_policy(model_ddqn, training_env, n_eval_episodes=15*1,return_episode_rewards=True)
default: 0.333 - penalty: -1.83 reward: 2.65 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.90 reward: 3.60 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.83 reward: 3.61 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.88 reward: 3.55 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.68 reward: 2.74 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.80 reward: 2.64 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.84 reward: 2.65 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.66 reward: 2.74 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.66 reward: 2.74 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.61 reward: 2.83 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.74 reward: 2.74 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.68 reward: 3.66 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.78 reward: 2.65 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.81 reward: 3.64 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.89 reward: 2.59 - client: 173.0 month: 15 - done: True
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng = rng, accepts_discrete_action=True, **kwargs) for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
dueldqn_mean_reward, dueldqn_std_reward = evaluate_policy(model_dueldqn, training_env, n_eval_episodes=15*1,return_episode_rewards=True)
default: 0.333 - penalty: -1.83 reward: 2.65 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.90 reward: 3.60 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.83 reward: 3.61 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.88 reward: 3.55 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.69 reward: 2.74 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.81 reward: 2.64 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.84 reward: 2.65 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.66 reward: 2.74 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.66 reward: 2.74 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.61 reward: 2.83 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.73 reward: 2.74 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.68 reward: 3.66 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.78 reward: 2.65 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.81 reward: 3.64 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.89 reward: 2.59 - client: 173.0 month: 15 - done: True
plt.plot(np.array(dqn_mean_reward).cumsum()/np.arange(1, len(dqn_mean_reward) + 1), label="dqn")
plt.plot(np.array(ddqn_mean_reward).cumsum()/np.arange(1, len(ddqn_mean_reward) + 1), label="ddqn")
plt.plot(np.array(dueldqn_mean_reward).cumsum()/np.arange(1, len(dueldqn_mean_reward) + 1), label="dueldqn")
plt.legend()
plt.show()
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng = rng, accepts_discrete_action=True, **kwargs) for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
a2c_mean_reward, a2c_std_reward = evaluate_policy(model_a2c, training_env, n_eval_episodes=15*1,return_episode_rewards=True)
default: 0.333 - penalty: -1.80 reward: 2.65 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.92 reward: 3.60 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.84 reward: 3.61 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.87 reward: 3.55 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.71 reward: 2.73 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.83 reward: 2.63 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.85 reward: 2.65 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.71 reward: 2.73 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.66 reward: 2.74 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.63 reward: 2.83 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.76 reward: 2.73 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.72 reward: 3.65 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.82 reward: 2.65 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.84 reward: 3.63 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.89 reward: 2.59 - client: 173.0 month: 15 - done: True
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng = rng, accepts_discrete_action=True, **kwargs) for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
trpo_mean_reward, trpo_std_reward = evaluate_policy(model_trpo, training_env, n_eval_episodes=15*1,return_episode_rewards=True)
default: 0.333 - penalty: -1.80 reward: 2.65 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.93 reward: 3.60 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.86 reward: 3.61 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.86 reward: 3.55 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.70 reward: 2.73 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.84 reward: 2.63 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.88 reward: 2.65 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.74 reward: 2.73 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.70 reward: 2.73 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.57 reward: 2.84 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.77 reward: 2.73 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.73 reward: 3.65 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.82 reward: 2.65 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.86 reward: -2.61 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.95 reward: 2.58 - client: 173.0 month: 15 - done: True
env_fns = [lambda : CustomEnv(train_subset, debug=True, scaled_features=SCALE, seed=SEED, rng = rng, accepts_discrete_action=True, **kwargs) for i in range(num_envs)]
training_env = SubprocVecEnv(env_fns)
ppo_mean_reward, ppo_std_reward = evaluate_policy(model_ppo, training_env, n_eval_episodes=15*1,return_episode_rewards=True)
default: 0.333 - penalty: -1.87 reward: 2.64 - client: 207.0 month: 1 - done: True default: 0.342 - penalty: -1.94 reward: 3.60 - client: 394.0 month: 2 - done: True default: 0.342 - penalty: -1.86 reward: 3.61 - client: 470.0 month: 3 - done: True default: 0.357 - penalty: -1.92 reward: 3.54 - client: 588.0 month: 4 - done: True default: 0.318 - penalty: -1.68 reward: 2.74 - client: 26.0 month: 5 - done: True default: 0.337 - penalty: -1.84 reward: 2.63 - client: 375.0 month: 6 - done: True default: 0.332 - penalty: -1.85 reward: 2.65 - client: 191.0 month: 7 - done: True default: 0.318 - penalty: -1.69 reward: 2.74 - client: 366.0 month: 8 - done: True default: 0.318 - penalty: -1.69 reward: 2.74 - client: 248.0 month: 9 - done: True default: 0.302 - penalty: -1.60 reward: 2.83 - client: 139.0 month: 10 - done: True default: 0.317 - penalty: -1.69 reward: 2.74 - client: 528.0 month: 11 - done: True default: 0.337 - penalty: -1.69 reward: 3.66 - client: 381.0 month: 12 - done: True default: 0.333 - penalty: -1.79 reward: -1.60 - client: 560.0 month: 13 - done: True default: 0.337 - penalty: -1.77 reward: 3.64 - client: 459.0 month: 14 - done: True default: 0.345 - penalty: -1.92 reward: 2.59 - client: 173.0 month: 15 - done: True
plt.figure(figsize=(10, 5))
plt.plot(np.array(dqn_mean_reward), label="dqn")
plt.plot(np.array(ddqn_mean_reward), label="ddqn")
plt.plot(np.array(dueldqn_mean_reward), label="dueldqn")
plt.plot(np.array(a2c_mean_reward), label="a2c")
plt.plot(np.array(trpo_mean_reward), label="trpo")
plt.plot(np.array(ppo_mean_reward), label="ppo")
plt.ylabel("Episode Reward", fontsize=12)
plt.xlabel("Timesteps", fontsize=12)
plt.title("Obtained Rewards", fontsize=14)
plt.legend()
plt.savefig("../imgs/2_modeling/obetained_rewards_all.png", dpi=300)
plt.show()
from sklearn.utils.class_weight import compute_class_weight
dummy = train_subset.copy().reset_index()
weight_schedule = []
for i in dummy["date_code"].unique():
month_values = dummy[dummy["date_code"] == i][default_col].values
weight_schedule.append(compute_class_weight(class_weight="balanced", classes=[0,1], y=month_values))
df_weight_schedule = pd.DataFrame(np.array(weight_schedule), columns=["w_0", "w_1"])
df_weight_schedule["date"] = [x for x in list(dates_from_codes.values())[:16]]
# df_weight_schedule["date"] = pd.to_datetime(df_weight_schedule["date"])
df_weight_schedule.set_index("date", inplace=True)
# import mdates from matplotlib
import matplotlib.dates as mdates
fig, ax = plt.subplots(figsize=(10, 5))
df_weight_schedule.plot(ax=ax)
ax.set_ylabel("Weight", fontsize=12)
ax.set_xlabel("Date", fontsize=12)
ax.set_title("Class Weights", fontsize=12)
plt.savefig("../imgs/2_modeling/class_weights.png", dpi=300)
plt.show()
df_weight_schedule
| w_0 | w_1 | |
|---|---|---|
| date | ||
| 2020-03-31 | 0.750000 | 1.500000 |
| 2020-04-30 | 0.759446 | 1.463592 |
| 2020-05-31 | 0.759446 | 1.463592 |
| 2020-06-30 | 0.777062 | 1.402326 |
| 2020-07-31 | 0.733577 | 1.570312 |
| 2020-08-31 | 0.753750 | 1.485222 |
| 2020-09-30 | 0.748139 | 1.507500 |
| 2020-10-31 | 0.733577 | 1.570312 |
| 2020-11-30 | 0.733577 | 1.570312 |
| 2020-12-31 | 0.716152 | 1.656593 |
| 2021-01-31 | 0.731796 | 1.578534 |
| 2021-02-28 | 0.753750 | 1.485222 |
| 2021-03-31 | 0.750000 | 1.500000 |
| 2021-04-30 | 0.753750 | 1.485222 |
| 2021-05-31 | 0.763291 | 1.449519 |
| 2021-06-30 | 0.751870 | 1.492574 |
fig, ax = plt.subplots(figsize=(10, 5))
cost_matrix_schedule = pd.DataFrame({
"C_TP": 2 + df_weight_schedule["w_1"],
"C_FP": -1 - df_weight_schedule["w_0"],
"C_FN": -2 - df_weight_schedule["w_0"],
"C_TN": 1 + df_weight_schedule["w_1"],
})
cost_matrix_schedule.plot(ax=ax)
ax.set_ylabel("Cost", fontsize=12)
ax.set_xlabel("Date", fontsize=12)
ax.set_title("Cost Matrix", fontsize=12)
plt.savefig("../imgs/2_modeling/cost_matrix_schedule.png", dpi=300)
plt.show()